Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 17 for prefilter (0.05 sec)

  1. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

            crawler.crawlerContext.setMaxThreadCheckCount(3);
            crawler.crawlerContext.setMaxAccessCount(maxCount);
            crawler.crawlerContext.setNumOfThread(numOfThread);
            crawler.urlFilter.addInclude(url + ".*");
            crawler.urlFilter.addExclude(url + "/dir1/.*");
            final String sessionId = crawler.execute();
            assertEquals(maxCount, dataService.getCount(sessionId));
            dataService.delete(sessionId);
        }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

            String sessionId = "test-session-006";
            urlFilter.init(sessionId);
    
            urlFilter.addExclude(".*\\.(css|js)$");
            urlFilter.addExclude(".*\\/admin\\/.*");
            urlFilter.addExclude(".*#.*");
    
            assertTrue(urlFilter.match("https://example.com/page.html"));
            assertFalse(urlFilter.match("https://example.com/style.css"));
            assertFalse(urlFilter.match("https://example.com/script.js"));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 19K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

        /**
         * Returns the URL filter.
         * @return The UrlFilter instance.
         */
        public UrlFilter getUrlFilter() {
            return urlFilter;
        }
    
        /**
         * Sets the URL filter.
         * @param urlFilter The UrlFilter instance to set.
         */
        public void setUrlFilter(final UrlFilter urlFilter) {
            this.urlFilter = urlFilter;
        }
    
        /**
         * Returns the rule manager.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

            crawler.crawlerContext.setMaxThreadCheckCount(3);
            crawler.crawlerContext.setMaxAccessCount(maxCount);
            crawler.crawlerContext.setNumOfThread(numOfThread);
            crawler.urlFilter.addInclude(url + ".*");
            crawler.urlFilter.addExclude(url + "/dir1/.*");
            final String sessionId = crawler.execute();
            assertEquals(maxCount, dataService.getCount(sessionId));
            dataService.delete(sessionId);
        }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 19.1K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/http/HcHttpClientTest.java

                    .singleton("urlFilter", UrlFilterImpl.class)//
                    .singleton("robotsTxtHelper", RobotsTxtHelper.class)//
                    .singleton("httpClient", HcHttpClient.class);
            httpClient = container.getComponent("httpClient");
            urlFilter = container.getComponent("urlFilter");
        }
    
        public void test_doGet() {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 11.7K bytes
    - Viewed (0)
  6. README.md

    crawler.crawlerContext.setDefaultIntervalTime(1000); // 1 second
    ```
    
    ### URL Filtering
    
    ```java
    // Include patterns
    crawler.urlFilter.addInclude("https://example.com/.*");
    crawler.urlFilter.addInclude(".*\\.pdf$");
    
    // Exclude patterns  
    crawler.urlFilter.addExclude(".*\\.js$");
    crawler.urlFilter.addExclude(".*login.*");
    ```
    
    ## Supported Protocols and Formats
    
    ### Protocols
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  7. .teamcity/src/main/kotlin/projects/StageProject.kt

                        "Flame graphs with $profiler for ${performanceScenario.scenario.scenario} | ${performanceScenario.testProject} " +
                            "on ${os.asName()} (bucket $bucketIndex)",
                    performanceSubProject = "performance",
                    bucketIndex = bucketIndex,
                    extraParameters =
    Registered: Wed Sep 10 11:36:15 UTC 2025
    - Last Modified: Tue Jul 29 03:24:58 UTC 2025
    - 11.9K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java

        protected void setUp() throws Exception {
            super.setUp();
            crawlerContext = new CrawlerContext();
        }
    
        /**
         * Test implementation of UrlFilter for testing
         */
        private static class TestUrlFilter implements UrlFilter {
            @Override
            public void init(String sessionId) {
            }
    
            @Override
            public void addInclude(String urlPattern) {
            }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 25.6K bytes
    - Viewed (0)
  9. cmd/utils.go

    	runtime.SetBlockProfileRate(0)     // Disable until needed
    }
    
    // Starts a profiler returns nil if profiler is not enabled, caller needs to handle this.
    func startProfiler(profilerType string) (minioProfiler, error) {
    	var prof profilerWrapper
    	prof.ext = "pprof"
    	// Enable profiler and set the name of the file that pkg/pprof
    	// library creates to store profiling data.
    Registered: Sun Sep 07 19:28:11 UTC 2025
    - Last Modified: Fri Aug 29 02:39:48 UTC 2025
    - 33K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

            final Set<String> urlSet = new HashSet<>();
            final List<UrlQueue<?>> childList = childUrlList.stream()
                    .filter(d -> StringUtil.isNotBlank(d.getUrl()) && urlSet.add(d.getUrl()) && crawlerContext.urlFilter.match(d.getUrl()))
                    .map(d -> {
                        final UrlQueue<?> uq = crawlerContainer.getComponent("urlQueue");
                        uq.setCreateTime(SystemUtil.currentTimeMillis());
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 20.4K bytes
    - Viewed (0)
Back to top