Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 10 for SitemapsHelper (0.09 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/SitemapsHelper.java

     * and handles potential exceptions during parsing.
     * The class also includes inner classes for handling XML sitemap and sitemap index parsing.
     */
    public class SitemapsHelper {
        private static final Logger logger = LogManager.getLogger(SitemapsHelper.class);
    
        /** The size of the preload buffer for checking file format. */
        protected int preloadSize = 512;
    
        /** Enable validation of sitemap entries. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 13:19:40 UTC 2025
    - 34.9K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessorTest.java

            SitemapSet sitemapSet = new SitemapSet();
            sitemapSet.addSitemap(sitemap);
    
            when(crawlerContainer.getComponent("sitemapsHelper")).thenReturn(sitemapsHelper);
            try {
                when(sitemapsHelper.parse(any(InputStream.class))).thenReturn(sitemapSet);
            } catch (Exception e) {
                fail("Should not throw exception in test setup");
            }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 13 13:29:22 UTC 2025
    - 12K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java

         */
        @Override
        public void process(final ResponseData responseData) {
            final SitemapsHelper sitemapsHelper = crawlerContainer.getComponent("sitemapsHelper");
            try (final InputStream responseBody = responseData.getResponseBody()) {
                final SitemapSet sitemapSet = sitemapsHelper.parse(responseBody);
                final Set<RequestData> requestDataSet = new LinkedHashSet<>();
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/SitemapsRuleTest.java

        @Override
        protected void setUp() throws Exception {
            super.setUp();
            StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("sitemapsHelper", SitemapsHelper.class)//
                    .singleton("sitemapsRule", SitemapsRule.class);
            sitemapsRule = container.getComponent("sitemapsRule");
        }
    
        public void test_match() {
            assertMatchTrue(getTestData1_OK());
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 4.7K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/SitemapsHelperTest.java

            assertFalse(sitemapsHelper.isValidPriority("-0.1"));
            assertFalse(sitemapsHelper.isValidPriority("1.1"));
            assertFalse(sitemapsHelper.isValidPriority("abc"));
    
            // Valid changefreq
            assertTrue(sitemapsHelper.isValidChangefreq("always"));
            assertTrue(sitemapsHelper.isValidChangefreq("hourly"));
            assertTrue(sitemapsHelper.isValidChangefreq("daily"));
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 36.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/SitemapsRule.java

            if (super.match(responseData)) {
                try (final InputStream is = responseData.getResponseBody()) {
                    final SitemapsHelper sitemapsHelper = crawlerContainer.getComponent("sitemapsHelper");
                    return sitemapsHelper.isValid(is);
                } catch (final CrawlingAccessException e) {
                    throw e;
                } catch (final Exception e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  7. fess-crawler-lasta/src/main/resources/crawler/sitemaps.xml

    <!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
    	"http://dbflute.org/meta/lastadi10.dtd">
    <components namespace="fessCrawler">
    	<include path="crawler/container.xml" />
    
    	<component name="sitemapsHelper" class="org.codelibs.fess.crawler.helper.SitemapsHelper"
    		instance="singleton">
    	</component>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 365 bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/RuleManagerImplTest.java

        public RuleManager ruleManager;
    
        @Override
        protected void setUp() throws Exception {
            super.setUp();
    
            StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("sitemapsHelper", SitemapsHelper.class)//
                    .singleton("sitemapsRule", SitemapsRule.class)//
                    .singleton("fileRule", RegexRule.class)//
                    .singleton("ruleManager", RuleManagerImpl.class);
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.2K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                    .singleton("urlConvertHelper", UrlConvertHelper.class)
                    .singleton("intervalController", DefaultIntervalController.class)
                    .singleton("sitemapsHelper", SitemapsHelper.class)
                    .singleton("logHelper", LogHelperImpl.class)
                    .singleton("encodingHelper", EncodingHelper.class)
                    .singleton("contentLengthHelper", ContentLengthHelper.class)
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Tue Nov 11 13:40:14 UTC 2025
    - 25.8K bytes
    - Viewed (0)
  10. CLAUDE.md

    extractorFactory.addExtractor("text/html", tikaExtractor, 1);  // Fallback
    ```
    
    ### Helpers
    
    **RobotsTxtHelper**: RFC 9309 parsing, user-agent matching, crawl-delay, sitemaps
    **SitemapsHelper**: Sitemap XML parsing, index handling
    **MimeTypeHelper**: MIME detection via Tika
    **EncodingHelper**: Charset detection with BOM
    **UrlConvertHelper**: URL normalization
    
    ---
    
    ## Development Workflow
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
Back to top