- Sort Score
- Result 10 results
- Languages All
Results 31 - 40 of 60 for processor (0.03 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java
httpClientPropertyMap.put(name, value); } } /** * Processes robots.txt for the given URL. * This method fetches and parses the robots.txt file to extract disallow/allow rules * and sitemap information. * * @param url The URL to process robots.txt for */ protected void processRobotsTxt(final String url) { if (StringUtil.isBlank(url)) {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 52.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/MimeTypeException.java
package org.codelibs.fess.crawler.exception; /** * MimeTypeException is a custom exception class that extends CrawlerSystemException. * It is used to indicate exceptions related to MIME type handling during the crawling process. * This exception can be thrown with a message, a cause, or both. */ public class MimeTypeException extends CrawlerSystemException { private static final long serialVersionUID = 1L; /**Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 1.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ExecutionTimeoutException.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.exception; /** * An exception indicating that the execution of a process has timed out. * This exception extends {@link org.codelibs.fess.crawler.exception.ExtractException}. * */ public class ExecutionTimeoutException extends ExtractException {Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/fs/FileSystemClient.java
*/ @Override public ResponseData doGet(final String uri) { return processRequest(uri, true); } /** * Processes a request for the given URI. * * @param uri the URI to process * @param includeContent whether to include content in the response * @return the response data * @throws CrawlingAccessException if the request fails */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 13.8K bytes - Viewed (0) -
README.md
``` ### Monitoring ```java // Monitor crawling progress while (crawler.crawlerContext.getStatus() == CrawlerStatus.RUNNING) { int processed = dataService.getCount(sessionId); System.out.println("Processed: " + processed + " URLs"); Thread.sleep(5000); } ``` ## Contributing We welcome contributions to Fess Crawler! Please follow these guidelines:
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/util/CrawlingParameterUtil.java
* This class provides methods to set and get various parameters related to the crawling process. * * <p>This class is final and cannot be instantiated.</p> * * <p>The following parameters are managed:</p> * <ul> * <li>{@link UrlQueue} - The queue of URLs to be crawled.</li> * <li>{@link CrawlerContext} - The context of the current crawling process.</li> * <li>{@link UrlQueueService} - The service for managing the URL queue.</li>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 6.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ExtractException.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.exception; /** * Exception thrown during the extraction process in the crawler. * This exception indicates a failure or error that occurred while extracting content from a crawled resource. * It extends {@link org.codelibs.fess.crawler.exception.CrawlerSystemException} and provides constructorsRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/UrlQueue.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.entity; /** * The UrlQueue interface represents a queue of URLs to be processed by a web crawler. * It provides methods to get and set various properties of a URL queue entry. * * @param <IDTYPE> the type of the identifier for the URL queue entry */ public interface UrlQueue<IDTYPE> {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 4.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/CrawlingAccessException.java
*/ package org.codelibs.fess.crawler.exception; /** * CrawlingAccessException is an exception class that represents an issue encountered while accessing a resource during the crawling process. * It extends CrawlerSystemException and provides functionality to set and check the log level for the exception. * * <p>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 3.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java
/** * Current status of the crawler. */ protected volatile CrawlerStatus status = CrawlerStatus.INITIALIZING; /** * Filter for URLs to control which URLs are processed. */ protected UrlFilter urlFilter; /** * Manager for crawling rules and configurations. */ protected RuleManager ruleManager; /**Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 8.9K bytes - Viewed (0)