Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 4 of 4 for crawl (0.04 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

        }
    
        /**
         * Gets the crawl delay value for the specified user agent from robots.txt.
         * The crawl delay specifies the time (in seconds) to wait between successive requests.
         *
         * @param userAgent The user agent string to match against robots.txt directives
         * @return The crawl delay value in seconds. Returns 0 if no matching directive is found
         *         or no crawl delay is specified for the matching directive.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  2. README.md

    </components>
    ```
    
    ### Crawler Context Configuration
    
    ```java
    // Set maximum number of URLs to crawl
    crawler.crawlerContext.setMaxAccessCount(1000);
    
    // Set number of crawler threads
    crawler.crawlerContext.setNumOfThread(10);
    
    // Set maximum crawl depth
    crawler.crawlerContext.setMaxDepth(3);
    
    // Set request interval (politeness)
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/fs/FileSystemClient.java

    import org.codelibs.fess.crawler.helper.ContentLengthHelper;
    import org.codelibs.fess.crawler.helper.MimeTypeHelper;
    
    import jakarta.annotation.Resource;
    
    /**
     * FileSystemClient is CrawlerClient implementation to crawl files on a file
     * system.
     *
     * @author shinsuke
     *
     */
    public class FileSystemClient extends AbstractCrawlerClient {
    
        /** Logger instance for this class */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.8K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

         */
        protected CrawlerClient getClient(final String url) {
            return clientFactory.getClient(url);
        }
    
        /**
         * Checks if the content has been updated since the last crawl.
         * @param client The crawler client.
         * @param urlQueue The URL queue entry.
         * @return true if content is updated, false otherwise.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 20.4K bytes
    - Viewed (0)
Back to top