Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 4 of 4 for getRobotsTxtUrlSet (0.07 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java

            crawlerContext.setRobotsTxtUrlSet(newSet);
            assertSame(newSet, crawlerContext.getRobotsTxtUrlSet());
            assertEquals(1, crawlerContext.getRobotsTxtUrlSet().size());
    
            // Set null
            crawlerContext.setRobotsTxtUrlSet(null);
            assertNull(crawlerContext.getRobotsTxtUrlSet());
        }
    
        /**
         * Test LRU behavior of robotsTxtUrlSet
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 25.6K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/http/HcHttpClientTest.java

                CrawlingParameterUtil.setCrawlerContext(crawlerContext);
                httpClient.init();
                httpClient.processRobotsTxt(url);
                assertEquals(1, crawlerContext.getRobotsTxtUrlSet().size());
                assertTrue(crawlerContext.getRobotsTxtUrlSet().contains("http://localhost:7070/robots.txt"));
                assertFalse(urlFilter.match("http://localhost:7070/admin/"));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 11.7K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

            this.intervalController = intervalController;
        }
    
        /**
         * Returns the set of robots.txt URLs.
         * @return The set of robots.txt URLs.
         */
        public Set<String> getRobotsTxtUrlSet() {
            return robotsTxtUrlSet;
        }
    
        /**
         * Sets the set of robots.txt URLs.
         * @param robotsTxtUrlSet The set of robots.txt URLs.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

            } else {
                hostUrl = url;
            }
            final String robotTxtUrl = hostUrl + "/robots.txt";
    
            // check url
            if (crawlerContext.getRobotsTxtUrlSet().contains(robotTxtUrl)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("{} is already visited.", robotTxtUrl);
                }
                return;
            }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
Back to top