Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 7 of 7 for RobotsTxt (0.13 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

        private final List<String> sitemapList = new ArrayList<>();
    
        /**
         * Creates a new RobotsTxt instance.
         */
        public RobotsTxt() {
            // Default constructor
        }
    
        /**
         * Checks if access to a given path is allowed for a specific user agent according to robots.txt rules.
         *
         * @param path The path to check for access permission
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 18.5K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            assertTrue(robotsTxt.allows("/bbb", "Crawler"));
            assertTrue(robotsTxt.allows("/ccc", "Crawler"));
            assertTrue(robotsTxt.allows("/ddd", "Crawler"));
            assertTrue(robotsTxt.allows("/aaa", "Crawler/1.0"));
            assertFalse(robotsTxt.allows("/bbb", "Crawler/1.0"));
            assertTrue(robotsTxt.allows("/ccc", "Crawler/1.0"));
            assertTrue(robotsTxt.allows("/ddd", "Crawler/1.0"));
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 20.6K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/RobotsTxtTest.java

            Directive directive = new Directive("MyBot");
            robotsTxt.addDirective(directive);
    
            Directive retrieved = robotsTxt.getDirective("OtherBot");
            assertNull(retrieved);
        }
    
        public void test_getDirectiveWithNull() {
            // Test getDirective with null user agent
            RobotsTxt robotsTxt = new RobotsTxt();
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 13 13:29:22 UTC 2025
    - 14.4K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

            try {
                reader = new BufferedReader(new InputStreamReader(new BOMInputStream(stream), charsetName));
    
                String line;
                final RobotsTxt robotsTxt = new RobotsTxt();
                final List<Directive> currentDirectiveList = new ArrayList<>();
                boolean isGroupRecordStarted = false;
    
                while ((line = reader.readLine()) != null) {
                    try {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 11.4K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                        }
                    }
    
                    if (httpEntity != null) {
                        final RobotsTxt robotsTxt = robotsTxtHelper.parse(httpEntity.getContent());
                        if (robotsTxt != null) {
                            final String[] sitemaps = robotsTxt.getSitemaps();
                            if (sitemaps.length > 0) {
                                crawlerContext.addSitemaps(sitemaps);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 53.7K bytes
    - Viewed (0)
  6. fess-crawler-lasta/src/main/resources/crawler/client.xml

    <!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
    	"http://dbflute.org/meta/lastadi10.dtd">
    <components namespace="fessCrawler">
    	<include path="crawler/container.xml" />
    	<include path="crawler/robotstxt.xml" />
    	<include path="crawler/contentlength.xml" />
    	<include path="crawler/mimetype.xml" />
    
    	<component name="internalHttpClient" class="org.codelibs.fess.crawler.client.http.HcHttpClient"
    		instance="prototype">
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 3.3K bytes
    - Viewed (0)
  7. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots_malformed.txt

    # Case 16: Multiple wildcards user agent
    User-agent: *
    Disallow: /default/
    
    # Case 17: Empty file handling (just comments and whitespace after this)
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 2.6K bytes
    - Viewed (0)
Back to top