- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 233 for crawling (0.06 sec)
-
README.md
// Configure crawling parameters crawler.addUrl("https://example.com"); crawler.crawlerContext.setMaxAccessCount(100); crawler.crawlerContext.setNumOfThread(5); crawler.urlFilter.addInclude("https://example.com/.*"); // Execute crawling String sessionId = crawler.execute(); System.out.println("Crawling completed. Session ID: " + sessionId); } }
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/impl/LogHelperImpl.java
if (e.isDebugEnabled()) { logger.debug("Crawling Access Exception at {}", urlQueue.getUrl(), e); } else if (e.isInfoEnabled()) { logger.info(e.getMessage()); } else if (e.isWarnEnabled()) { logger.warn("Crawling Access Exception at " + urlQueue.getUrl(), e); } else if (e.isErrorEnabled()) { logger.error("Crawling Access Exception at " + urlQueue.getUrl(), e); }
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 14K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/CrawlingInfoHelper.java
} /** * Stores crawling information and parameters for the specified session. * Creates a new crawling info record if none exists or if create flag is true. * Also stores any accumulated information parameters and clears the info map. * * @param sessionId the session ID for the crawling information * @param create if true, creates a new crawling info regardless of existing records
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Aug 07 03:06:29 UTC 2025 - 15.2K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/DataIndexHelper.java
/** * Helper class for managing data crawling operations in Fess. * This class coordinates the execution of data store crawling processes, * managing multiple concurrent crawling threads and handling the indexing * of crawled documents into the search engine. * * <p>The DataIndexHelper supports:</p> * <ul> * <li>Concurrent crawling of multiple data configurations</li>
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Aug 07 03:06:29 UTC 2025 - 18.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java
import org.codelibs.fess.crawler.service.UrlQueueService; import jakarta.annotation.Resource; /** * The Crawler class is the main class for web crawling. It manages the crawling process, * including adding URLs to the queue, filtering URLs, managing crawler threads, * and handling the overall crawling lifecycle. * * <p>It implements the Runnable interface to be executed in a separate thread,
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 14K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/app/service/CrawlingInfoService.java
} /** * Stores (inserts or updates) a crawling information record. * Sets up the store conditions including creation time if not already set, * then performs an insert or update operation with immediate refresh. * * @param crawlingInfo the crawling information entity to store * @throws FessSystemException if the crawling information is null */
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 19.9K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/exec/Crawler.java
* <ul> * <li>Web crawling - crawls web sites and web content</li> * <li>File system crawling - crawls file systems and documents</li> * <li>Data store crawling - crawls databases and other data sources</li> * <li>Combined crawling - runs multiple crawling types simultaneously</li> * </ul> * * <p>Command line usage: * <pre> * java org.codelibs.fess.exec.Crawler [options...]
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Aug 07 03:06:29 UTC 2025 - 31K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/CrawlingConfigHelper.java
import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import jakarta.annotation.PostConstruct; /** * Helper class for managing crawling configurations. * Provides functionality to store, retrieve, and manage different types of crawling configurations * including web, file, and data configurations. Supports caching and session-based configuration management. */ public class CrawlingConfigHelper {
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 19.5K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
if (logger.isInfoEnabled()) { logger.info("No crawling target urls."); } return; } doCrawl(sessionId, webConfigList, fileConfigList); } /** * Performs the actual crawling operation for the provided configurations. * * @param sessionId The session ID for this crawling operation * @param webConfigList List of web configurations to crawl
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 24.9K bytes - Viewed (0) -
src/main/resources/fess_label_en.properties
labels.createdTime=Created Time labels.depth=Depth labels.excludedPaths=Excluded Paths for Crawling labels.excludedUrls=Excluded URLs for Crawling labels.excludedDocPaths=Excluded Paths for Searching labels.excludedDocUrls=Excluded URLs for Searching labels.hostname=Hostname labels.id=ID labels.includedPaths=Included Paths for Crawling labels.includedUrls=Included URLs for Crawling labels.includedDocPaths=Included Paths for Searching
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 10 04:56:21 UTC 2025 - 40.7K bytes - Viewed (0)