Search Options

Display Count
Sort
Preferred Language
Advanced Search

Results 1 - 10 of 56 for sitemaps (0.09 seconds)

  1. fess-crawler-lasta/src/main/resources/crawler/sitemaps.xml

    Shinsuke Sugaya <******@****.***> 1444529815 +0900
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Sun Oct 11 02:16:55 GMT 2015
    - 365 bytes
    - Click Count (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        }
    
        /**
         * Adds sitemaps to the thread-local storage.
         * @param sitemaps An array of sitemap URLs.
         */
        public void addSitemaps(final String[] sitemaps) {
            sitemapsLocal.set(sitemaps);
        }
    
        /**
         * Removes sitemaps from the thread-local storage and returns them.
         * @return An array of sitemap URLs, or null if none were present.
         */
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Wed Dec 31 09:07:38 GMT 2025
    - 9K bytes
    - Click Count (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

        }
    
        /**
         * Adds sitemaps from robots.txt to the crawling queue.
         * @param urlQueue The URL queue to add sitemaps to.
         */
        protected void addSitemapsFromRobotsTxt(final UrlQueue<?> urlQueue) {
            final String[] sitemaps = crawlerContext.removeSitemaps();
            if (sitemaps != null) {
                for (final String childUrl : sitemaps) {
                    try {
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Wed Dec 31 09:07:38 GMT 2025
    - 20.3K bytes
    - Click Count (0)
  4. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java

                robotTxtFile.deleteOnExit();
    
                // sitemaps.xml
                buf = new StringBuilder();
                buf.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>").append('\n');
                buf.append("<urlset ").append("xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">").append('\n');
                buf.append("<url>").append('\n');
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Jan 15 01:11:43 GMT 2026
    - 8.1K bytes
    - Click Count (0)
  5. CLAUDE.md

    ### Helpers
    
    - **RobotsTxtHelper**: RFC 9309 parsing, user-agent matching, crawl-delay, sitemaps
    - **SitemapsHelper**: Sitemap XML parsing, index handling
    - **MimeTypeHelper**: MIME detection via Tika
    - **EncodingHelper**: Charset detection with BOM
    - **UrlConvertHelper**: URL normalization
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Mar 12 03:39:20 GMT 2026
    - 8.1K bytes
    - Click Count (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/Hc4HttpClient.java

                        if (robotsTxt != null) {
                            final String[] sitemaps = robotsTxt.getSitemaps();
                            if (sitemaps.length > 0) {
                                crawlerContext.addSitemaps(sitemaps);
                            }
    
                            final RobotsTxt.Directive directive = robotsTxt.getMatchedDirective(userAgent);
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Fri Jan 09 23:46:52 GMT 2026
    - 54.4K bytes
    - Click Count (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/Hc5HttpClient.java

                        if (robotsTxt != null) {
                            final String[] sitemaps = robotsTxt.getSitemaps();
                            if (sitemaps.length > 0) {
                                crawlerContext.addSitemaps(sitemaps);
                            }
    
                            final RobotsTxt.Directive directive = robotsTxt.getMatchedDirective(userAgent);
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Sat Jan 31 12:23:29 GMT 2026
    - 62.2K bytes
    - Click Count (0)
  8. fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml

        <include path="crawler/mimetype.xml"/>
        <include path="crawler/encoding.xml"/>
        <include path="crawler/urlconverter.xml"/>
        <include path="crawler/log.xml"/>
        <include path="crawler/sitemaps.xml"/>
    
        <include path="crawler/opensearch.xml"/>
    
    	<!-- Crawler -->
    	<component name="crawler" class="org.codelibs.fess.crawler.Crawler" instance="prototype" >
    	</component>
    
    	<!-- Crawler Thread -->
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Nov 07 04:44:10 GMT 2024
    - 2.2K bytes
    - Click Count (0)
  9. fess-crawler-lasta/src/main/resources/crawler.xml

    	<include path="crawler/extractor.xml"/>
    	<include path="crawler/mimetype.xml"/>
    	<include path="crawler/encoding.xml"/>
    	<include path="crawler/urlconverter.xml"/>
    	<include path="crawler/log.xml"/>
    	<include path="crawler/sitemaps.xml"/>
    
    	<!-- Crawler -->
    	<component name="crawler" class="org.codelibs.fess.crawler.Crawler" instance="prototype" >
    	</component>
    
    	<!-- Crawler Thread -->
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Tue Nov 28 13:40:25 GMT 2017
    - 1.7K bytes
    - Click Count (0)
  10. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                file.delete();
                file.mkdirs();
                file.deleteOnExit();
                fileTransformer.setPath(file.getAbsolutePath());
                crawler.addUrl(url + "sitemaps.xml");
                crawler.crawlerContext.setMaxAccessCount(maxCount);
                crawler.crawlerContext.setNumOfThread(numOfThread);
                crawler.urlFilter.addInclude(url + ".*");
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Jan 15 01:11:43 GMT 2026
    - 13.1K bytes
    - Click Count (0)
Back to Top