- Sort Score
- Result 10 results
- Languages All
Results 1 - 7 of 7 for RobotsTxt (0.09 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java
private final List<String> sitemapList = new ArrayList<>(); /** * Creates a new RobotsTxt instance. */ public RobotsTxt() { // Default constructor } /** * Checks if access to a given path is allowed for a specific user agent according to robots.txt rules. * * @param path The path to check for access permission
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 18.5K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java
assertTrue(robotsTxt.allows("/bbb", "Crawler")); assertTrue(robotsTxt.allows("/ccc", "Crawler")); assertTrue(robotsTxt.allows("/ddd", "Crawler")); assertTrue(robotsTxt.allows("/aaa", "Crawler/1.0")); assertFalse(robotsTxt.allows("/bbb", "Crawler/1.0")); assertTrue(robotsTxt.allows("/ccc", "Crawler/1.0")); assertTrue(robotsTxt.allows("/ddd", "Crawler/1.0"));
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 20.6K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/RobotsTxtTest.java
Directive directive = new Directive("MyBot"); robotsTxt.addDirective(directive); Directive retrieved = robotsTxt.getDirective("OtherBot"); assertNull(retrieved); } public void test_getDirectiveWithNull() { // Test getDirective with null user agent RobotsTxt robotsTxt = new RobotsTxt();Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Nov 13 13:29:22 UTC 2025 - 14.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java
try { reader = new BufferedReader(new InputStreamReader(new BOMInputStream(stream), charsetName)); String line; final RobotsTxt robotsTxt = new RobotsTxt(); final List<Directive> currentDirectiveList = new ArrayList<>(); boolean isGroupRecordStarted = false; while ((line = reader.readLine()) != null) { try {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Fri Nov 14 12:52:01 UTC 2025 - 11.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java
} } if (httpEntity != null) { final RobotsTxt robotsTxt = robotsTxtHelper.parse(httpEntity.getContent()); if (robotsTxt != null) { final String[] sitemaps = robotsTxt.getSitemaps(); if (sitemaps.length > 0) { crawlerContext.addSitemaps(sitemaps);Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 53.7K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler/client.xml
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" "http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <include path="crawler/robotstxt.xml" /> <include path="crawler/contentlength.xml" /> <include path="crawler/mimetype.xml" /> <component name="internalHttpClient" class="org.codelibs.fess.crawler.client.http.HcHttpClient" instance="prototype">
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 3.3K bytes - Viewed (0) -
fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots_malformed.txt
# Case 16: Multiple wildcards user agent User-agent: * Disallow: /default/ # Case 17: Empty file handling (just comments and whitespace after this)
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Fri Nov 14 12:52:01 UTC 2025 - 2.6K bytes - Viewed (0)