Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 2 of 2 for MyBot (0.12 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/RobotsTxtTest.java

            assertFalse(robotsTxt.allows("/admin/secret.html", "MyBot"));
            assertTrue(robotsTxt.allows("/public/", "MyBot")); // Not disallowed
        }
    
        public void test_allowsWithAllowOverridingDisallow() {
            // Test that allow takes precedence over disallow
            RobotsTxt robotsTxt = new RobotsTxt();
    
            Directive directive = new Directive("MyBot");
            directive.addDisallow("/admin/");
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 13 13:29:22 UTC 2025
    - 14.4K bytes
    - Viewed (0)
  2. README.md

    StandardCrawlerContainer container = new StandardCrawlerContainer();
    
    // Configure HTTP client with custom settings
    container.singleton("httpClient", HcHttpClient.class, client -> {
        client.setUserAgent("MyBot/1.0");
        client.setConnectionTimeout(30000);
        client.setMaxConnections(100);
    });
    
    // Configure URL filtering
    container.singleton("urlFilter", UrlFilterImpl.class, filter -> {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
Back to top