- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 11 for addInclude (0.06 sec)
-
fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/impl/UrlFilterImplTest.java
assertFalse(urlFilter.match("http://test.com/a")); } public void test_match_include_case2() { urlFilter.addInclude("http://example.com/.*"); urlFilter.addInclude("http://test.com/.*"); final String sessionId = "id1"; urlFilter.init(sessionId); assertTrue(urlFilter.match("http://example.com/"));
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 6.9K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java
// Test empty pattern urlFilter.addInclude(""); urlFilter.addExclude(""); // Test single character pattern urlFilter.addInclude("."); urlFilter.addExclude("*"); // Test patterns with only special characters urlFilter.addInclude("^$"); urlFilter.addExclude(".*"); // Should handle boundary conditions gracefully
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Wed Sep 03 14:42:53 UTC 2025 - 19K bytes - Viewed (0) -
fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java
crawler.crawlerContext.setMaxAccessCount(maxCount); crawler.crawlerContext.setNumOfThread(numOfThread); crawler.urlFilter.addInclude(url + ".*"); crawler.urlFilter.addExclude(url + "/dir1/.*"); final String sessionId = crawler.execute(); assertEquals(maxCount, dataService.getCount(sessionId)); dataService.delete(sessionId); }
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java
crawler.crawlerContext.setMaxAccessCount(maxCount); crawler.crawlerContext.setNumOfThread(numOfThread); crawler.urlFilter.addInclude(url + ".*"); crawler.urlFilter.addExclude(url + "/dir1/.*"); final String sessionId = crawler.execute(); assertEquals(maxCount, dataService.getCount(sessionId)); dataService.delete(sessionId); }
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 19.1K bytes - Viewed (0) -
README.md
``` ### URL Filtering ```java // Include patterns crawler.urlFilter.addInclude("https://example.com/.*"); crawler.urlFilter.addInclude(".*\\.pdf$"); // Exclude patterns crawler.urlFilter.addExclude(".*\\.js$"); crawler.urlFilter.addExclude(".*login.*"); ``` ## Supported Protocols and Formats ### Protocols
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/impl/UrlFilterImpl.java
} } /* * (non-Javadoc) * * @see org.codelibs.fess.crawler.filter.UrlFilter#addInclude(java.lang.String) */ @Override public void addInclude(final String urlPattern) { try { Pattern.compile(urlPattern); } catch (final Exception e) { if (logger.isWarnEnabled()) {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 9.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/UrlFilter.java
* Add an url pattern as a target. * * @param urlPattern Regular expression that is crawled */ void addInclude(String urlPattern); /** * Add an url pattern as a non-target. * * @param urlPattern Regular expression that is not crawled */ void addExclude(String urlPattern); /** * Process an url when it's added as a seed url. * * @param url URL */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.6K bytes - Viewed (0) -
fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java
crawler.addUrl(url); crawler.getCrawlerContext().setMaxAccessCount(maxCount); crawler.getCrawlerContext().setNumOfThread(numOfThread); crawler.urlFilter.addInclude(url + ".*"); final String sessionId = crawler.execute(); assertEquals(maxCount, dataService.getCount(sessionId)); dataService.delete(sessionId); } finally {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 7.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java
* @param regexp The regular expression for the include filter. */ public void addIncludeFilter(final String regexp) { if (StringUtil.isNotBlank(regexp)) { urlFilter.addInclude(regexp); } } /** * Adds an exclude filter for URLs. * URLs matching this regular expression will not be crawled. * @param regexp The regular expression for the exclude filter.
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 14K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 25.6K bytes - Viewed (0)