- Sort Score
- Num 10 results
- Language All
Results 1 - 10 of 10 for urlFilters (0.08 seconds)
-
src/main/java/org/codelibs/fess/opensearch/config/exentity/CrawlingConfig.java
public static final String KEEP_ORIGINAL_BODY = "keep.original.body"; public static final String CLEANUP_ALL = "cleanup.all"; public static final String CLEANUP_URL_FILTERS = "cleanup.urlFilters"; public static final String JCIFS_PREFIX = "jcifs."; public static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
Created: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Sat Mar 15 06:53:53 GMT 2025 - 5.6K bytes - Click Count (0) -
fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java
crawler.crawlerContext.setMaxThreadCheckCount(3); crawler.crawlerContext.setMaxAccessCount(maxCount); crawler.crawlerContext.setNumOfThread(numOfThread); crawler.urlFilter.addInclude(url + ".*"); crawler.urlFilter.addExclude(url + "/dir1/.*"); final String sessionId = crawler.execute(); assertEquals(maxCount, dataService.getCount(sessionId)); dataService.delete(sessionId); }
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Thu Jan 15 01:11:43 GMT 2026 - 13.1K bytes - Click Count (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java
/** * Returns the URL filter. * @return The UrlFilter instance. */ public UrlFilter getUrlFilter() { return urlFilter; } /** * Sets the URL filter. * @param urlFilter The UrlFilter instance to set. */ public void setUrlFilter(final UrlFilter urlFilter) { this.urlFilter = urlFilter; } /** * Returns the rule manager.Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Mon Nov 24 03:59:47 GMT 2025 - 17K bytes - Click Count (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java
} /** * Returns the URL filter. * @return The UrlFilter. */ public UrlFilter getUrlFilter() { return urlFilter; } /** * Sets the URL filter. * @param urlFilter The UrlFilter. */ public void setUrlFilter(final UrlFilter urlFilter) { this.urlFilter = urlFilter; } /** * Returns the rule manager.Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Wed Dec 31 09:07:38 GMT 2025 - 9K bytes - Click Count (0) -
README.md
crawler.crawlerContext.setDefaultIntervalTime(1000); // 1 second ``` ### URL Filtering ```java // Include patterns crawler.urlFilter.addInclude("https://example.com/.*"); crawler.urlFilter.addInclude(".*\\.pdf$"); // Exclude patterns crawler.urlFilter.addExclude(".*\\.js$"); crawler.urlFilter.addExclude(".*login.*"); ``` ## Supported Protocols and Formats ### Protocols
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Sun Aug 31 05:32:52 GMT 2025 - 15.3K bytes - Click Count (0) -
fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java
crawler.addUrl(url); crawler.getCrawlerContext().setMaxAccessCount(maxCount); crawler.getCrawlerContext().setNumOfThread(numOfThread); crawler.urlFilter.addInclude(url + ".*"); final String sessionId = crawler.execute(); assertEquals(maxCount, dataService.getCount(sessionId)); dataService.delete(sessionId); } finally {
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Thu Jan 15 01:11:43 GMT 2026 - 7.8K bytes - Click Count (0) -
src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
try { urlFilterService.delete(sid); } catch (final Exception e) { logger.warn("Failed to delete UrlFilter: sessionId={}", sid); } } final DuplicateHostHelper duplicateHostHelper = ComponentUtil.getDuplicateHostHelper(); // set urlsCreated: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Fri Nov 28 16:29:12 GMT 2025 - 25K bytes - Click Count (0) -
fess-crawler-lasta/src/main/resources/crawler/filter.xml
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" "http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <component name="urlFilter" class="org.codelibs.fess.crawler.filter.impl.UrlFilterImpl" instance="prototype"> </component>
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Sun Oct 11 02:16:55 GMT 2015 - 364 bytes - Click Count (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java
final Set<String> urlSet = new HashSet<>(); final List<UrlQueue<?>> childList = childUrlList.stream() .filter(d -> StringUtil.isNotBlank(d.getUrl()) && urlSet.add(d.getUrl()) && crawlerContext.urlFilter.match(d.getUrl())) .map(d -> { final UrlQueue<?> uq = crawlerContainer.getComponent("urlQueue"); uq.setCreateTime(SystemUtil.currentTimeMillis());
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Wed Dec 31 09:07:38 GMT 2025 - 20.3K bytes - Click Count (0) -
src/main/java/org/codelibs/fess/indexer/IndexUpdater.java
Created: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Fri Nov 28 16:29:12 GMT 2025 - 32.9K bytes - Click Count (0)