Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 9 of 9 for urlFilter (0.05 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/UrlFilter.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.filter;
    
    /**
     * UrlFilter checks if a given url is a target one.
     *
     * @author shinsuke
     *
     */
    public interface UrlFilter {
    
        /**
         * Initialize a url filter by sessionId.
         *
         * @param sessionId Session ID
         */
        void init(String sessionId);
    
        /**
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 1.6K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/impl/UrlFilterImplTest.java

            assertEquals(0, urlFilter.cachedIncludeSet.size());
            assertEquals(0, urlFilter.cachedExcludeSet.size());
    
            urlFilter.addExclude(".*[test.*");
    
            assertEquals(0, urlFilter.cachedIncludeSet.size());
            assertEquals(0, urlFilter.cachedExcludeSet.size());
    
            final String sessionId = "id1";
            urlFilter.init(sessionId);
            assertEquals(0, urlFilter.cachedIncludeSet.size());
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 6.9K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/http/HcHttpClientTest.java

                    .singleton("urlFilter", UrlFilterImpl.class)//
                    .singleton("robotsTxtHelper", RobotsTxtHelper.class)//
                    .singleton("httpClient", HcHttpClient.class);
            httpClient = container.getComponent("httpClient");
            urlFilter = container.getComponent("urlFilter");
        }
    
        public void test_doGet() {
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu May 09 09:28:25 UTC 2024
    - 8.8K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

        }
    
        public void setUrlFilter(final UrlFilter urlFilter) {
            this.urlFilter = urlFilter;
        }
    
        public RuleManager getRuleManager() {
            return ruleManager;
        }
    
        public void setRuleManager(final RuleManager ruleManager) {
            this.ruleManager = ruleManager;
        }
    
        public IntervalController getIntervalController() {
            return intervalController;
        }
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:47:32 UTC 2024
    - 7.9K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/impl/UrlFilterImpl.java

    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.filter.UrlFilter;
    import org.codelibs.fess.crawler.service.UrlFilterService;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import jakarta.annotation.Resource;
    
    /**
     * @author shinsuke
     *
     */
    public class UrlFilterImpl implements UrlFilter {
    
        private static final Logger logger = LoggerFactory.getLogger(UrlFilterImpl.class);
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:47:32 UTC 2024
    - 7.3K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

            return status;
        }
    
        public void setStatus(final CrawlerStatus status) {
            this.status = status;
        }
    
        public UrlFilter getUrlFilter() {
            return urlFilter;
        }
    
        public void setUrlFilter(final UrlFilter urlFilter) {
            this.urlFilter = urlFilter;
        }
    
        public RuleManager getRuleManager() {
            return ruleManager;
        }
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Sat Oct 12 01:40:57 UTC 2024
    - 4.5K bytes
    - Viewed (0)
  7. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                crawler.addUrl(url);
                crawler.getCrawlerContext().setMaxAccessCount(maxCount);
                crawler.getCrawlerContext().setNumOfThread(numOfThread);
                crawler.urlFilter.addInclude(url + ".*");
                final String sessionId = crawler.execute();
                assertEquals(maxCount, dataService.getCount(sessionId));
                dataService.delete(sessionId);
            } finally {
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Nov 07 04:44:10 UTC 2024
    - 7.7K bytes
    - Viewed (0)
  8. fess-crawler-lasta/src/main/resources/crawler/filter.xml

    <!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
    	"http://dbflute.org/meta/lastadi10.dtd">
    <components namespace="fessCrawler">
    	<include path="crawler/container.xml" />
    
    	<component name="urlFilter"
    		class="org.codelibs.fess.crawler.filter.impl.UrlFilterImpl" instance="prototype">
    	</component>
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 364 bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/es/config/exentity/CrawlingConfig.java

                public static final String KEEP_ORIGINAL_BODY = "keep.original.body";
                public static final String CLEANUP_ALL = "cleanup.all";
                public static final String CLEANUP_URL_FILTERS = "cleanup.urlFilters";
                public static final String JCIFS_PREFIX = "jcifs.";
                public static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Thu Oct 24 13:01:38 UTC 2024
    - 5.5K bytes
    - Viewed (0)
Back to top