Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 19 for urlFilters (0.81 sec)

  1. src/main/java/org/codelibs/fess/opensearch/config/exentity/CrawlingConfig.java

                public static final String KEEP_ORIGINAL_BODY = "keep.original.body";
                public static final String CLEANUP_ALL = "cleanup.all";
                public static final String CLEANUP_URL_FILTERS = "cleanup.urlFilters";
                public static final String JCIFS_PREFIX = "jcifs.";
                public static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Sat Mar 15 06:53:53 UTC 2025
    - 5.6K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/UrlFilter.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.filter;
    
    /**
     * UrlFilter checks if a given url is a target one.
     *
     * @author shinsuke
     *
     */
    public interface UrlFilter {
    
        /**
         * Initialize a url filter by sessionId.
         *
         * @param sessionId Session ID
         */
        void init(String sessionId);
    
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.6K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

            crawler.addExcludeFilter("http://example\\.com/exclude/.*");
    
            // Initialize the filter
            crawler.urlFilter.init(crawler.getSessionId());
    
            assertTrue(crawler.urlFilter.match("http://example.com/page"));
            assertFalse(crawler.urlFilter.match("http://example.com/exclude/page"));
        }
    
        public void test_daemon_mode() throws Exception {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Tue Nov 11 13:40:14 UTC 2025
    - 25.8K bytes
    - Viewed (0)
  4. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

            crawler.crawlerContext.setMaxThreadCheckCount(3);
            crawler.crawlerContext.setMaxAccessCount(maxCount);
            crawler.crawlerContext.setNumOfThread(numOfThread);
            crawler.urlFilter.addInclude(url + ".*");
            crawler.urlFilter.addExclude(url + "/dir1/.*");
            final String sessionId = crawler.execute();
            assertEquals(maxCount, dataService.getCount(sessionId));
            dataService.delete(sessionId);
        }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/impl/UrlFilterImplTest.java

            assertEquals(0, urlFilter.cachedIncludeSet.size());
            assertEquals(0, urlFilter.cachedExcludeSet.size());
    
            urlFilter.addExclude(".*[test.*");
    
            assertEquals(0, urlFilter.cachedIncludeSet.size());
            assertEquals(0, urlFilter.cachedExcludeSet.size());
    
            final String sessionId = "id1";
            urlFilter.init(sessionId);
            assertEquals(0, urlFilter.cachedIncludeSet.size());
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.9K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

            String sessionId = "test-session-006";
            urlFilter.init(sessionId);
    
            urlFilter.addExclude(".*\\.(css|js)$");
            urlFilter.addExclude(".*\\/admin\\/.*");
            urlFilter.addExclude(".*#.*");
    
            assertTrue(urlFilter.match("https://example.com/page.html"));
            assertFalse(urlFilter.match("https://example.com/style.css"));
            assertFalse(urlFilter.match("https://example.com/script.js"));
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 19K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

        /**
         * Returns the URL filter.
         * @return The UrlFilter instance.
         */
        public UrlFilter getUrlFilter() {
            return urlFilter;
        }
    
        /**
         * Sets the URL filter.
         * @param urlFilter The UrlFilter instance to set.
         */
        public void setUrlFilter(final UrlFilter urlFilter) {
            this.urlFilter = urlFilter;
        }
    
        /**
         * Returns the rule manager.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 17K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        }
    
        /**
         * Returns the URL filter.
         * @return The UrlFilter.
         */
        public UrlFilter getUrlFilter() {
            return urlFilter;
        }
    
        /**
         * Sets the URL filter.
         * @param urlFilter The UrlFilter.
         */
        public void setUrlFilter(final UrlFilter urlFilter) {
            this.urlFilter = urlFilter;
        }
    
        /**
         * Returns the rule manager.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerThreadTest.java

            crawlerContainer = mock(CrawlerContainer.class);
            logHelper = mock(LogHelper.class);
            clientFactory = mock(CrawlerClientFactory.class);
            urlFilter = mock(UrlFilter.class);
            ruleManager = mock(RuleManager.class);
    
            crawlerContext.urlFilter = urlFilter;
            crawlerContext.ruleManager = ruleManager;
    
            crawlerThread.urlQueueService = urlQueueService;
            crawlerThread.dataService = dataService;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 18.3K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/impl/UrlFilterImpl.java

    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.filter.UrlFilter;
    import org.codelibs.fess.crawler.service.UrlFilterService;
    
    import jakarta.annotation.Resource;
    
    /**
     * Implementation of the {@link UrlFilter} interface.
     * This class provides functionality to filter URLs based on include and exclude patterns.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.2K bytes
    - Viewed (0)
Back to top