- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 36 for crawling (0.28 sec)
-
README.md
// Configure crawling parameters crawler.addUrl("https://example.com"); crawler.crawlerContext.setMaxAccessCount(100); crawler.crawlerContext.setNumOfThread(5); crawler.urlFilter.addInclude("https://example.com/.*"); // Execute crawling String sessionId = crawler.execute(); System.out.println("Crawling completed. Session ID: " + sessionId); } }Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/log/LogType.java
* Each enum constant represents a specific event or state in the crawler's execution. */ public enum LogType { /** Indicates the start of a crawling process. */ START_CRAWLING, /** Indicates the cleanup phase of crawling. */ CLEANUP_CRAWLING, /** Indicates an unsupported URL was encountered when crawling started. */ UNSUPPORTED_URL_AT_CRAWLING_STARTED,
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/impl/LogHelperImpl.java
if (e.isDebugEnabled()) { logger.debug("Crawling Access Exception at {}", urlQueue.getUrl(), e); } else if (e.isInfoEnabled()) { logger.info(e.getMessage()); } else if (e.isWarnEnabled()) { logger.warn("Crawling Access Exception at " + urlQueue.getUrl(), e); } else if (e.isErrorEnabled()) { logger.error("Crawling Access Exception at " + urlQueue.getUrl(), e); }
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 14K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java
import org.codelibs.fess.crawler.service.UrlQueueService; import jakarta.annotation.Resource; /** * The Crawler class is the main class for web crawling. It manages the crawling process, * including adding URLs to the queue, filtering URLs, managing crawler threads, * and handling the overall crawling lifecycle. * * <p>It implements the Runnable interface to be executed in a separate thread,
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 14K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RequestData.java
import java.util.Objects; import org.codelibs.fess.crawler.Constants; /** * Represents a request data for crawling. * This class encapsulates the HTTP method, URL, and weight associated with a crawling request. */ public class RequestData { /** * HTTP methods supported for crawling requests. */ public enum Method { /** HTTP GET method. */ GET, /** HTTP POST method. */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/ResponseProcessor.java
/** * The ResponseProcessor interface defines a contract for processing response data. * Implementations of this interface are responsible for handling the response data * obtained during a crawling process. */ public interface ResponseProcessor { /** * Processes the given response data. * * @param responseData the response data to be processed */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/impl/AbstractIntervalController.java
} } } /** * Delays the crawling process before processing a URL. */ protected abstract void delayBeforeProcessing(); /** * Delays the crawling process after processing a URL. */ protected abstract void delayAfterProcessing(); /** * Delays the crawling process when there are no URLs in the queue. */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java
/** * Filter for URLs to control which URLs are processed. */ protected UrlFilter urlFilter; /** * Manager for crawling rules and configurations. */ protected RuleManager ruleManager; /** * Controller for managing crawling intervals and delays. */ protected IntervalController intervalController; /** * Set of robots.txt URLs that have been processed.Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 8.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/util/CrawlingParameterUtil.java
import org.codelibs.fess.crawler.service.DataService; import org.codelibs.fess.crawler.service.UrlQueueService; /** * Utility class for managing crawling parameters using ThreadLocal variables. * This class provides methods to set and get various parameters related to the crawling process. * * <p>This class is final and cannot be instantiated.</p> * * <p>The following parameters are managed:</p> * <ul>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 6.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java
* <li>Handling exceptions that may occur during the crawling process.</li> * </ol> * * <p> * The thread also manages the active thread count using {@code crawlerContext.activeThreadCountLock} * and provides methods for logging messages using {@link LogHelper}. * </p> * * <p> * The crawling process continues until the crawler status is {@link CrawlerStatus#DONE} or the
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 20.4K bytes - Viewed (0)