Search Options

Display Count
Sort
Preferred Language
Advanced Search

Results 1 - 10 of 10 for urlFilters (0.08 seconds)

  1. src/main/java/org/codelibs/fess/opensearch/config/exentity/CrawlingConfig.java

                public static final String KEEP_ORIGINAL_BODY = "keep.original.body";
                public static final String CLEANUP_ALL = "cleanup.all";
                public static final String CLEANUP_URL_FILTERS = "cleanup.urlFilters";
                public static final String JCIFS_PREFIX = "jcifs.";
                public static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Sat Mar 15 06:53:53 GMT 2025
    - 5.6K bytes
    - Click Count (0)
  2. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

            crawler.crawlerContext.setMaxThreadCheckCount(3);
            crawler.crawlerContext.setMaxAccessCount(maxCount);
            crawler.crawlerContext.setNumOfThread(numOfThread);
            crawler.urlFilter.addInclude(url + ".*");
            crawler.urlFilter.addExclude(url + "/dir1/.*");
            final String sessionId = crawler.execute();
            assertEquals(maxCount, dataService.getCount(sessionId));
            dataService.delete(sessionId);
        }
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Jan 15 01:11:43 GMT 2026
    - 13.1K bytes
    - Click Count (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

        /**
         * Returns the URL filter.
         * @return The UrlFilter instance.
         */
        public UrlFilter getUrlFilter() {
            return urlFilter;
        }
    
        /**
         * Sets the URL filter.
         * @param urlFilter The UrlFilter instance to set.
         */
        public void setUrlFilter(final UrlFilter urlFilter) {
            this.urlFilter = urlFilter;
        }
    
        /**
         * Returns the rule manager.
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Mon Nov 24 03:59:47 GMT 2025
    - 17K bytes
    - Click Count (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        }
    
        /**
         * Returns the URL filter.
         * @return The UrlFilter.
         */
        public UrlFilter getUrlFilter() {
            return urlFilter;
        }
    
        /**
         * Sets the URL filter.
         * @param urlFilter The UrlFilter.
         */
        public void setUrlFilter(final UrlFilter urlFilter) {
            this.urlFilter = urlFilter;
        }
    
        /**
         * Returns the rule manager.
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Wed Dec 31 09:07:38 GMT 2025
    - 9K bytes
    - Click Count (0)
  5. README.md

    crawler.crawlerContext.setDefaultIntervalTime(1000); // 1 second
    ```
    
    ### URL Filtering
    
    ```java
    // Include patterns
    crawler.urlFilter.addInclude("https://example.com/.*");
    crawler.urlFilter.addInclude(".*\\.pdf$");
    
    // Exclude patterns  
    crawler.urlFilter.addExclude(".*\\.js$");
    crawler.urlFilter.addExclude(".*login.*");
    ```
    
    ## Supported Protocols and Formats
    
    ### Protocols
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Sun Aug 31 05:32:52 GMT 2025
    - 15.3K bytes
    - Click Count (0)
  6. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                crawler.addUrl(url);
                crawler.getCrawlerContext().setMaxAccessCount(maxCount);
                crawler.getCrawlerContext().setNumOfThread(numOfThread);
                crawler.urlFilter.addInclude(url + ".*");
                final String sessionId = crawler.execute();
                assertEquals(maxCount, dataService.getCount(sessionId));
                dataService.delete(sessionId);
            } finally {
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Jan 15 01:11:43 GMT 2026
    - 7.8K bytes
    - Click Count (0)
  7. src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java

                    try {
                        urlFilterService.delete(sid);
                    } catch (final Exception e) {
                        logger.warn("Failed to delete UrlFilter: sessionId={}", sid);
                    }
                }
    
                final DuplicateHostHelper duplicateHostHelper = ComponentUtil.getDuplicateHostHelper();
    
                // set urls
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 25K bytes
    - Click Count (0)
  8. fess-crawler-lasta/src/main/resources/crawler/filter.xml

    <!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
    	"http://dbflute.org/meta/lastadi10.dtd">
    <components namespace="fessCrawler">
    	<include path="crawler/container.xml" />
    
    	<component name="urlFilter"
    		class="org.codelibs.fess.crawler.filter.impl.UrlFilterImpl" instance="prototype">
    	</component>
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Sun Oct 11 02:16:55 GMT 2015
    - 364 bytes
    - Click Count (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

            final Set<String> urlSet = new HashSet<>();
            final List<UrlQueue<?>> childList = childUrlList.stream()
                    .filter(d -> StringUtil.isNotBlank(d.getUrl()) && urlSet.add(d.getUrl()) && crawlerContext.urlFilter.match(d.getUrl()))
                    .map(d -> {
                        final UrlQueue<?> uq = crawlerContainer.getComponent("urlQueue");
                        uq.setCreateTime(SystemUtil.currentTimeMillis());
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Wed Dec 31 09:07:38 GMT 2025
    - 20.3K bytes
    - Click Count (0)
  10. src/main/java/org/codelibs/fess/indexer/IndexUpdater.java

         */
        private void deleteBySessionId(final String sessionId) {
            try {
                urlFilterService.delete(sessionId);
            } catch (final Exception e) {
                logger.warn("Failed to delete UrlFilter: sessionId={}", sessionId, e);
            }
            try {
                urlQueueService.delete(sessionId);
            } catch (final Exception e) {
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 32.9K bytes
    - Click Count (0)
Back to Top