- Sort Score
- Result 10 results
- Languages All
Results 11 - 20 of 75 for sitemaps (0.1 sec)
-
fess-crawler/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 6.3K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java
} /** * Test sitemaps add and remove operations */ public void test_sitemaps() { // Initial state assertNull(crawlerContext.removeSitemaps()); // Add sitemaps String[] sitemaps = new String[] { "http://example.com/sitemap.xml", "http://test.com/sitemap.xml" }; crawlerContext.addSitemaps(sitemaps); // Remove and verify
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 25.6K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java
assertFalse(robotsTxt.allows("/ddd", "Hoge Crawler")); String[] sitemaps = robotsTxt.getSitemaps(); assertEquals(2, sitemaps.length); assertEquals("http://www.example.com/sitmap.xml", sitemaps[0]); assertEquals("http://www.example.net/sitmap.xml", sitemaps[1]); } public void testParse_disable() {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 5.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapFile.java
* and processing of Sitemap data. * </p> * * <p> * The {@code loc} attribute specifies the URL of the Sitemap, while the {@code lastmod} attribute * indicates the last time the Sitemap file was modified. The {@code lastmod} attribute is used by crawlers * to incrementally fetch sitemaps that have been updated since a certain date. * </p> * * <p>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.4K bytes - Viewed (1) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/SitemapsRule.java
* represents a valid sitemap. It uses a SitemapsHelper to validate the response body as an InputStream. * The rule checks if the URL matches the defined regex pattern and then validates the content as a sitemap. * If any exception occurs during the sitemap validation, it logs the error and returns false. * */ public class SitemapsRule extends RegexRule { /**
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.6K bytes - Viewed (0) -
fess-crawler/src/test/resources/sitemaps/sitemap1.xml
<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>http://www.example.com/</loc> <lastmod>2005-01-01</lastmod> <changefreq>monthly</changefreq> <priority>0.8</priority> </url> <url> <loc>http://www.example.com/catalog?item=12&desc=vacation_hawaii</loc> <changefreq>weekly</changefreq> </url> <url>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 915 bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java
} /** * Adds sitemaps from robots.txt to the crawling queue. * @param urlQueue The URL queue to add sitemaps to. */ protected void addSitemapsFromRobotsTxt(final UrlQueue<?> urlQueue) { final String[] sitemaps = crawlerContext.removeSitemaps(); if (sitemaps != null) { for (final String childUrl : sitemaps) { try {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 20.4K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler.xml
<include path="crawler/extractor.xml"/> <include path="crawler/mimetype.xml"/> <include path="crawler/encoding.xml"/> <include path="crawler/urlconverter.xml"/> <include path="crawler/log.xml"/> <include path="crawler/sitemaps.xml"/> <!-- Crawler --> <component name="crawler" class="org.codelibs.fess.crawler.Crawler" instance="prototype" > </component> <!-- Crawler Thread -->
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Tue Nov 28 13:40:25 UTC 2017 - 1.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java
} /** * Adds a sitemap URL to the list of sitemaps. * * @param url The URL of the sitemap to be added */ public void addSitemap(final String url) { if (!sitemapList.contains(url)) { sitemapList.add(url); } } /** * Returns an array of sitemap URLs. * * @return an array of sitemap URLs */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10K bytes - Viewed (0) -
fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml
<include path="crawler/mimetype.xml"/> <include path="crawler/encoding.xml"/> <include path="crawler/urlconverter.xml"/> <include path="crawler/log.xml"/> <include path="crawler/sitemaps.xml"/> <include path="crawler/opensearch.xml"/> <!-- Crawler --> <component name="crawler" class="org.codelibs.fess.crawler.Crawler" instance="prototype" > </component> <!-- Crawler Thread -->
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Nov 07 04:44:10 UTC 2024 - 2.2K bytes - Viewed (0)