- Sort Score
- Result 10 results
- Languages All
Results 1 - 6 of 6 for RobotsTxt (0.11 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java
import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.codelibs.core.lang.StringUtil; public class RobotsTxt { private static final String ALL_BOTS = "*"; protected final Map<Pattern, Directive> directiveMap = new LinkedHashMap<>(); private final List<String> sitemapList = new ArrayList<>();
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 4.8K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler/robotstxt.xml
Shinsuke Sugaya <******@****.***> 1444529815 +0900
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 367 bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java
assertTrue(robotsTxt.allows("/bbb", "Crawler")); assertTrue(robotsTxt.allows("/ccc", "Crawler")); assertTrue(robotsTxt.allows("/ddd", "Crawler")); assertTrue(robotsTxt.allows("/aaa", "Crawler/1.0")); assertFalse(robotsTxt.allows("/bbb", "Crawler/1.0")); assertTrue(robotsTxt.allows("/ccc", "Crawler/1.0")); assertTrue(robotsTxt.allows("/ddd", "Crawler/1.0"));
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 5.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java
final BufferedReader reader = new BufferedReader(new InputStreamReader(new BOMInputStream(stream), charsetName)); String line; final RobotsTxt robotsTxt = new RobotsTxt(); final List<Directive> currentDirectiveList = new ArrayList<>(); boolean isGroupRecodeStarted = false; while ((line = reader.readLine()) != null) { line = stripComment(line).trim();
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Sat Oct 12 01:40:57 UTC 2024 - 6.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java
} } if (httpEntity != null) { final RobotsTxt robotsTxt = robotsTxtHelper.parse(httpEntity.getContent()); if (robotsTxt != null) { final String[] sitemaps = robotsTxt.getSitemaps(); if (sitemaps.length > 0) { crawlerContext.addSitemaps(sitemaps);
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu May 09 09:29:26 UTC 2024 - 41K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler/client.xml
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" "http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <include path="crawler/robotstxt.xml" /> <include path="crawler/contentlength.xml" /> <include path="crawler/mimetype.xml" /> <component name="internalHttpClient" class="org.codelibs.fess.crawler.client.http.HcHttpClient" instance="prototype">
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Tue Aug 08 12:54:47 UTC 2023 - 2.8K bytes - Viewed (0)