Search Options

Results per page
Sort
Preferred Languages
Advance

Results 11 - 20 of 75 for sitemaps (0.04 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java

                robotTxtFile.deleteOnExit();
    
                // sitemaps.xml
                buf = new StringBuilder();
                buf.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>").append('\n');
                buf.append("<urlset ").append("xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">").append('\n');
                buf.append("<url>").append('\n');
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.3K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java

        }
    
        /**
         * Test sitemaps add and remove operations
         */
        public void test_sitemaps() {
            // Initial state
            assertNull(crawlerContext.removeSitemaps());
    
            // Add sitemaps
            String[] sitemaps = new String[] { "http://example.com/sitemap.xml", "http://test.com/sitemap.xml" };
            crawlerContext.addSitemaps(sitemaps);
    
            // Remove and verify
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 25.6K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            assertFalse(robotsTxt.allows("/ddd", "Hoge Crawler"));
    
            String[] sitemaps = robotsTxt.getSitemaps();
            assertEquals(2, sitemaps.length);
            assertEquals("http://www.example.com/sitmap.xml", sitemaps[0]);
            assertEquals("http://www.example.net/sitmap.xml", sitemaps[1]);
    
        }
    
        public void testParse_disable() {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 5.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapFile.java

     * and processing of Sitemap data.
     * </p>
     *
     * <p>
     * The {@code loc} attribute specifies the URL of the Sitemap, while the {@code lastmod} attribute
     * indicates the last time the Sitemap file was modified.  The {@code lastmod} attribute is used by crawlers
     * to incrementally fetch sitemaps that have been updated since a certain date.
     * </p>
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.4K bytes
    - Viewed (1)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/SitemapsRule.java

     * represents a valid sitemap. It uses a SitemapsHelper to validate the response body as an InputStream.
     * The rule checks if the URL matches the defined regex pattern and then validates the content as a sitemap.
     * If any exception occurs during the sitemap validation, it logs the error and returns false.
     *
     */
    public class SitemapsRule extends RegexRule {
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  6. fess-crawler/src/test/resources/sitemaps/sitemap1.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
       <url>
          <loc>http://www.example.com/</loc>
          <lastmod>2005-01-01</lastmod>
          <changefreq>monthly</changefreq>
          <priority>0.8</priority>
       </url>
       <url>
          <loc>http://www.example.com/catalog?item=12&amp;desc=vacation_hawaii</loc>
          <changefreq>weekly</changefreq>
       </url>
       <url>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 915 bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

        }
    
        /**
         * Adds sitemaps from robots.txt to the crawling queue.
         * @param urlQueue The URL queue to add sitemaps to.
         */
        protected void addSitemapsFromRobotsTxt(final UrlQueue<?> urlQueue) {
            final String[] sitemaps = crawlerContext.removeSitemaps();
            if (sitemaps != null) {
                for (final String childUrl : sitemaps) {
                    try {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 20.4K bytes
    - Viewed (0)
  8. fess-crawler-lasta/src/main/resources/crawler.xml

    	<include path="crawler/extractor.xml"/>
    	<include path="crawler/mimetype.xml"/>
    	<include path="crawler/encoding.xml"/>
    	<include path="crawler/urlconverter.xml"/>
    	<include path="crawler/log.xml"/>
    	<include path="crawler/sitemaps.xml"/>
    
    	<!-- Crawler -->
    	<component name="crawler" class="org.codelibs.fess.crawler.Crawler" instance="prototype" >
    	</component>
    
    	<!-- Crawler Thread -->
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Tue Nov 28 13:40:25 UTC 2017
    - 1.7K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

        }
    
        /**
         * Adds a sitemap URL to the list of sitemaps.
         *
         * @param url The URL of the sitemap to be added
         */
        public void addSitemap(final String url) {
            if (!sitemapList.contains(url)) {
                sitemapList.add(url);
            }
        }
    
        /**
         * Returns an array of sitemap URLs.
         *
         * @return an array of sitemap URLs
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  10. fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml

        <include path="crawler/mimetype.xml"/>
        <include path="crawler/encoding.xml"/>
        <include path="crawler/urlconverter.xml"/>
        <include path="crawler/log.xml"/>
        <include path="crawler/sitemaps.xml"/>
    
        <include path="crawler/opensearch.xml"/>
    
    	<!-- Crawler -->
    	<component name="crawler" class="org.codelibs.fess.crawler.Crawler" instance="prototype" >
    	</component>
    
    	<!-- Crawler Thread -->
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Nov 07 04:44:10 UTC 2024
    - 2.2K bytes
    - Viewed (0)
Back to top