Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 9 of 9 for Allow (0.01 sec)

  1. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt

    User-agent: FessCrawler
    Disallow:           # allows all 
    
    User-agent: BruteBot
    Disallow: /
    Allow: /foo/bar/
    Crawl-delay: 1314000
    
    # welcome!
    User-agent: Googlebot
    Crawl-delay: 1
    
    User-agent: *
    Disallow: /private/
    Disallow: /help        # disallows /help.html, /help/index.html, etc.
    Allow: /help/faq.html
    Crawl-delay: 3
    
    User-agent: Crawler
    Disallow: /aaa
    
    User-agent: Crawler/1.0
    Disallow: /bbb
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 566 bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

        protected static final Pattern DISALLOW_RECORD = Pattern.compile("^disallow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing allow records. */
        protected static final Pattern ALLOW_RECORD = Pattern.compile("^allow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing crawl-delay records. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/Sitemap.java

    package org.codelibs.fess.crawler.entity;
    
    import java.io.Serializable;
    
    /**
     * The Sitemap interface represents a sitemap entity with location and last modification date.
     * It extends the Serializable interface to allow sitemap objects to be serialized.
     */
    public interface Sitemap extends Serializable {
    
        /**
         * Retrieves the location (URL) of the sitemap.
         *
         * @return the location of the sitemap as a String.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.2K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClient.java

    import org.codelibs.fess.crawler.entity.ResponseData;
    
    /**
     * Interface representing a client for a web crawler.
     * This client is responsible for executing requests and handling responses.
     * It extends {@link AutoCloseable} to allow for resource management.
     */
    public interface CrawlerClient extends AutoCloseable {
    
        /**
         * Sets the initialization parameters for the crawler client.
         *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/suggest/concurrent/Deferred.java

     *   to be executed when the computation is complete.
     *   The Promise class has then and error methods that allow you to register callbacks
     *   for successful and unsuccessful computations, respectively.
     * </p>
     *
     * <p>
     *   The Deferred class uses a CountDownLatch to allow you to wait for the computation to complete.
     *   The resolve and reject methods decrement the CountDownLatch, allowing the getResponse method
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Fri Jul 04 14:00:23 UTC 2025
    - 7.8K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

     *
     * <p>Key features:</p>
     * <ul>
     *   <li>Supports multiple user-agent directives with pattern matching</li>
     *   <li>Handles Allow and Disallow rules for path-based access control</li>
     *   <li>Manages crawl delay settings per user agent</li>
     *   <li>Stores sitemap URLs listed in robots.txt</li>
     * </ul>
     *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  7. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

            final String clusterName = UUID.randomUUID().toString();
            runner.onBuild((number, settingsBuilder) -> {
                settingsBuilder.put("http.cors.enabled", true);
                settingsBuilder.put("http.cors.allow-origin", "*");
                settingsBuilder.put("discovery.type", "single-node");
            }).build(newConfigs().clusterName(clusterName).numOfNode(1));
    
            // wait for yellow status
            runner.ensureYellow();
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

            this.useRobotsTxtDisallows = useRobotsTxtDisallows;
        }
    
        /**
         * Sets whether to use robots.txt allow rules.
         *
         * @param useRobotsTxtAllows True to use allow rules, false otherwise
         */
        public void setUseRobotsTxtAllows(final boolean useRobotsTxtAllows) {
            this.useRobotsTxtAllows = useRobotsTxtAllows;
        }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
  9. fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml

        <!-- and a lower priority than pcap - TIKA-4174 -->
        <magic priority="55">
          <match minShouldMatch="2">
            <match value="user-agent:" type="stringignorecase" offset="0"/>
            <match value="allow:" type="stringignorecase" offset="0"/>
            <match value="disallow:" type="stringignorecase" offset="0"/>
            <match value="sitemap:" type="stringignorecase" offset="0"/>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Mar 13 08:18:01 UTC 2025
    - 320.1K bytes
    - Viewed (1)
Back to top