- Sort Score
- Num 10 results
- Language All
Results 1 - 10 of 56 for sitemaps (0.05 seconds)
-
fess-crawler-lasta/src/main/resources/crawler/sitemaps.xml
Shinsuke Sugaya <******@****.***> 1444529815 +0900
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Sun Oct 11 02:16:55 GMT 2015 - 365 bytes - Click Count (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java
} /** * Adds sitemaps to the thread-local storage. * @param sitemaps An array of sitemap URLs. */ public void addSitemaps(final String[] sitemaps) { sitemapsLocal.set(sitemaps); } /** * Removes sitemaps from the thread-local storage and returns them. * @return An array of sitemap URLs, or null if none were present. */Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Wed Dec 31 09:07:38 GMT 2025 - 9K bytes - Click Count (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java
} /** * Adds sitemaps from robots.txt to the crawling queue. * @param urlQueue The URL queue to add sitemaps to. */ protected void addSitemapsFromRobotsTxt(final UrlQueue<?> urlQueue) { final String[] sitemaps = crawlerContext.removeSitemaps(); if (sitemaps != null) { for (final String childUrl : sitemaps) { try {Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Wed Dec 31 09:07:38 GMT 2025 - 20.3K bytes - Click Count (0) -
fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Thu Jan 15 01:11:43 GMT 2026 - 8.1K bytes - Click Count (0) -
CLAUDE.md
### Helpers - **RobotsTxtHelper**: RFC 9309 parsing, user-agent matching, crawl-delay, sitemaps - **SitemapsHelper**: Sitemap XML parsing, index handling - **MimeTypeHelper**: MIME detection via Tika - **EncodingHelper**: Charset detection with BOM - **UrlConvertHelper**: URL normalization
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Thu Mar 12 03:39:20 GMT 2026 - 8.1K bytes - Click Count (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/Hc4HttpClient.java
if (robotsTxt != null) { final String[] sitemaps = robotsTxt.getSitemaps(); if (sitemaps.length > 0) { crawlerContext.addSitemaps(sitemaps); } final RobotsTxt.Directive directive = robotsTxt.getMatchedDirective(userAgent);
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Fri Jan 09 23:46:52 GMT 2026 - 54.4K bytes - Click Count (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/Hc5HttpClient.java
if (robotsTxt != null) { final String[] sitemaps = robotsTxt.getSitemaps(); if (sitemaps.length > 0) { crawlerContext.addSitemaps(sitemaps); } final RobotsTxt.Directive directive = robotsTxt.getMatchedDirective(userAgent);
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Sat Jan 31 12:23:29 GMT 2026 - 62.2K bytes - Click Count (0) -
fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml
<include path="crawler/mimetype.xml"/> <include path="crawler/encoding.xml"/> <include path="crawler/urlconverter.xml"/> <include path="crawler/log.xml"/> <include path="crawler/sitemaps.xml"/> <include path="crawler/opensearch.xml"/> <!-- Crawler --> <component name="crawler" class="org.codelibs.fess.crawler.Crawler" instance="prototype" > </component> <!-- Crawler Thread -->
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Thu Nov 07 04:44:10 GMT 2024 - 2.2K bytes - Click Count (0) -
fess-crawler-lasta/src/main/resources/crawler.xml
<include path="crawler/extractor.xml"/> <include path="crawler/mimetype.xml"/> <include path="crawler/encoding.xml"/> <include path="crawler/urlconverter.xml"/> <include path="crawler/log.xml"/> <include path="crawler/sitemaps.xml"/> <!-- Crawler --> <component name="crawler" class="org.codelibs.fess.crawler.Crawler" instance="prototype" > </component> <!-- Crawler Thread -->
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Tue Nov 28 13:40:25 GMT 2017 - 1.7K bytes - Click Count (0) -
fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java
file.delete(); file.mkdirs(); file.deleteOnExit(); fileTransformer.setPath(file.getAbsolutePath()); crawler.addUrl(url + "sitemaps.xml"); crawler.crawlerContext.setMaxAccessCount(maxCount); crawler.crawlerContext.setNumOfThread(numOfThread); crawler.urlFilter.addInclude(url + ".*");
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Thu Jan 15 01:11:43 GMT 2026 - 13.1K bytes - Click Count (0)