- Sort Score
- Result 10 results
- Languages All
Results 1 - 2 of 2 for processRobotsTxt (0.29 sec)
-
fess-crawler/src/test/java/org/codelibs/fess/crawler/client/http/HcHttpClientTest.java
urlFilter.init(sessionId); crawlerContext.setUrlFilter(urlFilter); CrawlingParameterUtil.setCrawlerContext(crawlerContext); httpClient.init(); httpClient.processRobotsTxt(url); assertEquals(1, crawlerContext.getRobotsTxtUrlSet().size()); assertTrue(crawlerContext.getRobotsTxtUrlSet().contains("http://localhost:7070/robots.txt"));
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 11.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java
* This method fetches and parses the robots.txt file to extract disallow/allow rules * and sitemap information. * * @param url The URL to process robots.txt for */ protected void processRobotsTxt(final String url) { if (StringUtil.isBlank(url)) { throw new CrawlerSystemException("url is null or empty."); } if (robotsTxtHelper == null || !robotsTxtHelper.isEnabled()) {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 52.2K bytes - Viewed (0)