Search Options

Results per page
Sort
Preferred Languages
Advance

Results 31 - 40 of 60 for processor (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                httpClientPropertyMap.put(name, value);
            }
        }
    
        /**
         * Processes robots.txt for the given URL.
         * This method fetches and parses the robots.txt file to extract disallow/allow rules
         * and sitemap information.
         *
         * @param url The URL to process robots.txt for
         */
        protected void processRobotsTxt(final String url) {
            if (StringUtil.isBlank(url)) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/MimeTypeException.java

    package org.codelibs.fess.crawler.exception;
    
    /**
     * MimeTypeException is a custom exception class that extends CrawlerSystemException.
     * It is used to indicate exceptions related to MIME type handling during the crawling process.
     * This exception can be thrown with a message, a cause, or both.
     */
    public class MimeTypeException extends CrawlerSystemException {
    
        private static final long serialVersionUID = 1L;
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ExecutionTimeoutException.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.exception;
    
    /**
     * An exception indicating that the execution of a process has timed out.
     * This exception extends {@link org.codelibs.fess.crawler.exception.ExtractException}.
     *
     */
    public class ExecutionTimeoutException extends ExtractException {
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/fs/FileSystemClient.java

         */
        @Override
        public ResponseData doGet(final String uri) {
            return processRequest(uri, true);
        }
    
        /**
         * Processes a request for the given URI.
         *
         * @param uri the URI to process
         * @param includeContent whether to include content in the response
         * @return the response data
         * @throws CrawlingAccessException if the request fails
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.8K bytes
    - Viewed (0)
  5. README.md

    ```
    
    ### Monitoring
    
    ```java
    // Monitor crawling progress
    while (crawler.crawlerContext.getStatus() == CrawlerStatus.RUNNING) {
        int processed = dataService.getCount(sessionId);
        System.out.println("Processed: " + processed + " URLs");
        Thread.sleep(5000);
    }
    ```
    
    ## Contributing
    
    We welcome contributions to Fess Crawler! Please follow these guidelines:
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/CrawlingParameterUtil.java

     * This class provides methods to set and get various parameters related to the crawling process.
     *
     * <p>This class is final and cannot be instantiated.</p>
     *
     * <p>The following parameters are managed:</p>
     * <ul>
     *   <li>{@link UrlQueue} - The queue of URLs to be crawled.</li>
     *   <li>{@link CrawlerContext} - The context of the current crawling process.</li>
     *   <li>{@link UrlQueueService} - The service for managing the URL queue.</li>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.4K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ExtractException.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.exception;
    
    /**
     * Exception thrown during the extraction process in the crawler.
     * This exception indicates a failure or error that occurred while extracting content from a crawled resource.
     * It extends {@link org.codelibs.fess.crawler.exception.CrawlerSystemException} and provides constructors
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 3K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/UrlQueue.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.entity;
    
    /**
     * The UrlQueue interface represents a queue of URLs to be processed by a web crawler.
     * It provides methods to get and set various properties of a URL queue entry.
     *
     * @param <IDTYPE> the type of the identifier for the URL queue entry
     */
    public interface UrlQueue<IDTYPE> {
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 4.3K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/CrawlingAccessException.java

     */
    package org.codelibs.fess.crawler.exception;
    
    /**
     * CrawlingAccessException is an exception class that represents an issue encountered while accessing a resource during the crawling process.
     * It extends CrawlerSystemException and provides functionality to set and check the log level for the exception.
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        /**
         * Current status of the crawler.
         */
        protected volatile CrawlerStatus status = CrawlerStatus.INITIALIZING;
    
        /**
         * Filter for URLs to control which URLs are processed.
         */
        protected UrlFilter urlFilter;
    
        /**
         * Manager for crawling rules and configurations.
         */
        protected RuleManager ruleManager;
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.9K bytes
    - Viewed (0)
Back to top