Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 8 of 8 for disallowed (0.04 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

             */
            public String[] getAllows() {
                return allowedPaths.toArray(new String[allowedPaths.size()]);
            }
    
            /**
             * Gets all disallowed paths for this directive.
             * @return an array of disallowed paths
             */
            public String[] getDisallows() {
                return disallowedPaths.toArray(new String[disallowedPaths.size()]);
            }
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  2. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt

    User-agent: FessCrawler
    Disallow:           # allows all 
    
    User-agent: BruteBot
    Disallow: /
    Allow: /foo/bar/
    Crawl-delay: 1314000
    
    # welcome!
    User-agent: Googlebot
    Crawl-delay: 1
    
    User-agent: *
    Disallow: /private/
    Disallow: /help        # disallows /help.html, /help/index.html, etc.
    Allow: /help/faq.html
    Crawl-delay: 3
    
    User-agent: Crawler
    Disallow: /aaa
    
    User-agent: Crawler/1.0
    Disallow: /bbb
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 566 bytes
    - Viewed (0)
  3. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java

                tempDir.mkdirs();
    
                final StringBuilder buf = new StringBuilder();
                buf.append("User-agent: *").append('\n');
                buf.append("Disallow: /admin/").append('\n');
                buf.append("Disallow: /websvn/").append('\n');
                final File robotTxtFile = new File(tempDir, "robots.txt");
                FileUtil.writeBytes(robotTxtFile.getAbsolutePath(), buf.toString().getBytes("UTF-8"));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 5K bytes
    - Viewed (0)
  4. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java

                tempDir.mkdirs();
    
                // robots.txt
                StringBuilder buf = new StringBuilder();
                buf.append("User-agent: *").append('\n');
                buf.append("Disallow: /admin/").append('\n');
                buf.append("Disallow: /websvn/").append('\n');
                final File robotTxtFile = new File(tempDir, "robots.txt");
                FileUtil.writeBytes(robotTxtFile.getAbsolutePath(), buf.toString().getBytes("UTF-8"));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.3K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

                Pattern.compile("^user-agent:\\s*([^\\t\\n\\x0B\\f\\r]+)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing disallow records. */
        protected static final Pattern DISALLOW_RECORD = Pattern.compile("^disallow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing allow records. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java

                tempDir.mkdirs();
    
                // robots.txt
                StringBuilder buf = new StringBuilder();
                buf.append("User-agent: *").append('\n');
                buf.append("Disallow: /admin/").append('\n');
                buf.append("Disallow: /websvn/").append('\n');
                final File robotTxtFile = new File(tempDir, "robots.txt");
                FileUtil.writeBytes(robotTxtFile.getAbsolutePath(), buf.toString().getBytes("UTF-8"));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.3K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

            this.redirectHttpStatusPattern = redirectHttpStatusPattern;
        }
    
        /**
         * Sets whether to use robots.txt disallow rules.
         *
         * @param useRobotsTxtDisallows True to use disallow rules, false otherwise
         */
        public void setUseRobotsTxtDisallows(final boolean useRobotsTxtDisallows) {
            this.useRobotsTxtDisallows = useRobotsTxtDisallows;
        }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
  8. fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml

        <magic priority="55">
          <match minShouldMatch="2">
            <match value="user-agent:" type="stringignorecase" offset="0"/>
            <match value="allow:" type="stringignorecase" offset="0"/>
            <match value="disallow:" type="stringignorecase" offset="0"/>
            <match value="sitemap:" type="stringignorecase" offset="0"/>
            <match value="\nuser-agent:" type="stringignorecase" offset="0:1000"/>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Mar 13 08:18:01 UTC 2025
    - 320.1K bytes
    - Viewed (1)
Back to top