Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 36 for crawling (0.28 sec)

  1. README.md

            
            // Configure crawling parameters
            crawler.addUrl("https://example.com");
            crawler.crawlerContext.setMaxAccessCount(100);
            crawler.crawlerContext.setNumOfThread(5);
            crawler.urlFilter.addInclude("https://example.com/.*");
            
            // Execute crawling
            String sessionId = crawler.execute();
            System.out.println("Crawling completed. Session ID: " + sessionId);
        }
    }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/log/LogType.java

     * Each enum constant represents a specific event or state in the crawler's execution.
     */
    public enum LogType {
        /** Indicates the start of a crawling process. */
        START_CRAWLING,
        /** Indicates the cleanup phase of crawling. */
        CLEANUP_CRAWLING,
        /** Indicates an unsupported URL was encountered when crawling started. */
        UNSUPPORTED_URL_AT_CRAWLING_STARTED,
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.4K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/impl/LogHelperImpl.java

            if (e.isDebugEnabled()) {
                logger.debug("Crawling Access Exception at {}", urlQueue.getUrl(), e);
            } else if (e.isInfoEnabled()) {
                logger.info(e.getMessage());
            } else if (e.isWarnEnabled()) {
                logger.warn("Crawling Access Exception at " + urlQueue.getUrl(), e);
            } else if (e.isErrorEnabled()) {
                logger.error("Crawling Access Exception at " + urlQueue.getUrl(), e);
            }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

    import org.codelibs.fess.crawler.service.UrlQueueService;
    
    import jakarta.annotation.Resource;
    
    /**
     * The Crawler class is the main class for web crawling. It manages the crawling process,
     * including adding URLs to the queue, filtering URLs, managing crawler threads,
     * and handling the overall crawling lifecycle.
     *
     * <p>It implements the Runnable interface to be executed in a separate thread,
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RequestData.java

    import java.util.Objects;
    
    import org.codelibs.fess.crawler.Constants;
    
    /**
     * Represents a request data for crawling.
     * This class encapsulates the HTTP method, URL, and weight associated with a crawling request.
     */
    public class RequestData {
        /**
         * HTTP methods supported for crawling requests.
         */
        public enum Method {
            /** HTTP GET method. */
            GET,
            /** HTTP POST method. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/ResponseProcessor.java

    /**
     * The ResponseProcessor interface defines a contract for processing response data.
     * Implementations of this interface are responsible for handling the response data
     * obtained during a crawling process.
     */
    public interface ResponseProcessor {
    
        /**
         * Processes the given response data.
         *
         * @param responseData the response data to be processed
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.1K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/impl/AbstractIntervalController.java

                }
            }
        }
    
        /**
         * Delays the crawling process before processing a URL.
         */
        protected abstract void delayBeforeProcessing();
    
        /**
         * Delays the crawling process after processing a URL.
         */
        protected abstract void delayAfterProcessing();
    
        /**
         * Delays the crawling process when there are no URLs in the queue.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.5K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        /**
         * Filter for URLs to control which URLs are processed.
         */
        protected UrlFilter urlFilter;
    
        /**
         * Manager for crawling rules and configurations.
         */
        protected RuleManager ruleManager;
    
        /**
         * Controller for managing crawling intervals and delays.
         */
        protected IntervalController intervalController;
    
        /**
         * Set of robots.txt URLs that have been processed.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/CrawlingParameterUtil.java

    import org.codelibs.fess.crawler.service.DataService;
    import org.codelibs.fess.crawler.service.UrlQueueService;
    
    /**
     * Utility class for managing crawling parameters using ThreadLocal variables.
     * This class provides methods to set and get various parameters related to the crawling process.
     *
     * <p>This class is final and cannot be instantiated.</p>
     *
     * <p>The following parameters are managed:</p>
     * <ul>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.4K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

     *   <li>Handling exceptions that may occur during the crawling process.</li>
     * </ol>
     *
     * <p>
     * The thread also manages the active thread count using {@code crawlerContext.activeThreadCountLock}
     * and provides methods for logging messages using {@link LogHelper}.
     * </p>
     *
     * <p>
     * The crawling process continues until the crawler status is {@link CrawlerStatus#DONE} or the
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 20.4K bytes
    - Viewed (0)
Back to top