Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 4 of 4 for RobotsTxt (0.05 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

        private final List<String> sitemapList = new ArrayList<>();
    
        /**
         * Creates a new RobotsTxt instance.
         */
        public RobotsTxt() {
            // Default constructor
        }
    
        /**
         * Checks if access to a given path is allowed for a specific user agent according to robots.txt rules.
         *
         * @param path The path to check for access permission
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            assertTrue(robotsTxt.allows("/bbb", "Crawler"));
            assertTrue(robotsTxt.allows("/ccc", "Crawler"));
            assertTrue(robotsTxt.allows("/ddd", "Crawler"));
            assertTrue(robotsTxt.allows("/aaa", "Crawler/1.0"));
            assertFalse(robotsTxt.allows("/bbb", "Crawler/1.0"));
            assertTrue(robotsTxt.allows("/ccc", "Crawler/1.0"));
            assertTrue(robotsTxt.allows("/ddd", "Crawler/1.0"));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 5.9K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

                String line;
                final RobotsTxt robotsTxt = new RobotsTxt();
                final List<Directive> currentDirectiveList = new ArrayList<>();
                boolean isGroupRecordStarted = false;
                while ((line = reader.readLine()) != null) {
                    line = stripComment(line).trim();
                    if (StringUtil.isEmpty(line)) {
                        continue;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                        }
                    }
    
                    if (httpEntity != null) {
                        final RobotsTxt robotsTxt = robotsTxtHelper.parse(httpEntity.getContent());
                        if (robotsTxt != null) {
                            final String[] sitemaps = robotsTxt.getSitemaps();
                            if (sitemaps.length > 0) {
                                crawlerContext.addSitemaps(sitemaps);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
Back to top