Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 9 of 9 for magenta (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

     *   <li>Manages crawl delay settings per user agent</li>
     *   <li>Stores sitemap URLs listed in robots.txt</li>
     * </ul>
     *
     * <p>The class uses case-insensitive pattern matching for user agents and supports
     * wildcard characters (*) in user agent strings. When multiple directives match a user agent,
     * the most specific (longest) match is used.</p>
     *
     */
    public class RobotsTxt {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

            this.authSchemeProviderMap = authSchemeProviderMap;
        }
    
        /**
         * Sets the user agent string.
         * @param userAgent The user agent string.
         */
        public void setUserAgent(final String userAgent) {
            this.userAgent = userAgent;
        }
    
        /**
         * Sets the credentials provider.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

     * </a></li>
     * </ul>
     *
     * @author bowez
     * @author shinsuke
     *
     */
    public class RobotsTxtHelper {
    
        /** Pattern for parsing user-agent records. */
        protected static final Pattern USER_AGENT_RECORD =
                Pattern.compile("^user-agent:\\s*([^\\t\\n\\x0B\\f\\r]+)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing disallow records. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

         */
        public void setCookieSpec(final String cookieSpec) {
            this.cookieSpec = cookieSpec;
        }
    
        /**
         * Sets the user agent string.
         *
         * @param userAgent The user agent string
         */
        public void setUserAgent(final String userAgent) {
            this.userAgent = userAgent;
        }
    
        /**
         * Sets the proxy host name.
         *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
  5. README.md

    ```bash
    # Format code (Eclipse formatter)
    mvn formatter:format
    
    # Check/apply license headers
    mvn license:check
    mvn license:format
    
    # Generate test coverage report
    mvn jacoco:prepare-agent test jacoco:report
    
    # Generate API documentation
    mvn javadoc:javadoc
    ```
    
    ### Testing
    
    The project uses JUnit 4 with embedded OpenSearch for integration testing:
    
    ```bash
    # Run all tests
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Sun Aug 31 03:31:14 UTC 2025
    - 12.1K bytes
    - Viewed (1)
  6. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java

                tempDir.delete();
                tempDir.mkdirs();
    
                // robots.txt
                StringBuilder buf = new StringBuilder();
                buf.append("User-agent: *").append('\n');
                buf.append("Disallow: /admin/").append('\n');
                buf.append("Disallow: /websvn/").append('\n');
                final File robotTxtFile = new File(tempDir, "robots.txt");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.3K bytes
    - Viewed (0)
  7. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java

                final File tempDir = File.createTempFile("crawlerDocRoot", "");
                tempDir.delete();
                tempDir.mkdirs();
    
                final StringBuilder buf = new StringBuilder();
                buf.append("User-agent: *").append('\n');
                buf.append("Disallow: /admin/").append('\n');
                buf.append("Disallow: /websvn/").append('\n');
                final File robotTxtFile = new File(tempDir, "robots.txt");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 5K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/util/CrawlerWebServer.java

                tempDir.delete();
                tempDir.mkdirs();
    
                // robots.txt
                StringBuilder buf = new StringBuilder();
                buf.append("User-agent: *").append('\n');
                buf.append("Disallow: /admin/").append('\n');
                buf.append("Disallow: /websvn/").append('\n');
                final File robotTxtFile = new File(tempDir, "robots.txt");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.3K bytes
    - Viewed (0)
  9. fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml

          <match minShouldMatch="2">
            <match value="user-agent:" type="stringignorecase" offset="0"/>
            <match value="allow:" type="stringignorecase" offset="0"/>
            <match value="disallow:" type="stringignorecase" offset="0"/>
            <match value="sitemap:" type="stringignorecase" offset="0"/>
            <match value="\nuser-agent:" type="stringignorecase" offset="0:1000"/>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Mar 13 08:18:01 UTC 2025
    - 320.1K bytes
    - Viewed (1)
Back to top