Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 42 for aient (1.23 sec)

  1. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots_wildcard.txt

    User-agent: PriorityBot
    Disallow: /store
    Allow: /store/public
    Disallow: /store/public/sale
    
    # Test Allow vs Disallow with same length (Allow wins)
    User-agent: SameLengthBot
    Disallow: /page
    Allow: /page
    
    # Test multiple wildcards
    User-agent: MultiWildcardBot
    Disallow: /*.cgi*
    Disallow: /*?*id=*
    
    # Test literal $ in middle of pattern
    User-agent: DollarBot
    Disallow: /price$info
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 13 14:03:41 UTC 2025
    - 910 bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

     *   <li>Manages crawl delay settings per user agent</li>
     *   <li>Stores sitemap URLs listed in robots.txt</li>
     * </ul>
     *
     * <p>The class uses case-insensitive pattern matching for user agents and supports
     * wildcard characters (*) in user agent strings. When multiple directives match a user agent,
     * the most specific (longest) match is used.</p>
     *
     */
    public class RobotsTxt {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 18.5K bytes
    - Viewed (0)
  3. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots_malformed.txt

    NoValue:
    :::
       :
    
    # Case 13: Numeric crawl-delay edge cases
    User-agent: NumericBot
    Crawl-delay: 0
    Crawl-delay: 999999999
    Crawl-delay: 1.23e10
    
    # Case 14: Tab characters instead of spaces
    User-agent:	TabBot
    Disallow:	/tab1/
    Allow:	/tab2/
    
    # Case 15: Unicode and special characters in user-agent
    User-agent: Botâ„¢
    Disallow: /trademark/
    
    User-agent: Bot®
    Disallow: /registered/
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/helper/UserAgentHelper.java

        }
    
        /** The HTTP header name for User-Agent */
        protected static final String USER_AGENT = "user-agent";
    
        /** The request attribute key for storing cached user agent type */
        protected static final String USER_AGENT_TYPE = "ViewHelper.UserAgent";
    
        /**
         * Determines the user agent type from the current HTTP request.
         * The method analyzes the User-Agent header to categorize the browser type
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 3.5K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            RobotsTxt robotsTxt;
            final InputStream in = new java.io.ByteArrayInputStream(robotsTxtContent.getBytes());
            try {
                robotsTxt = robotsTxtHelper.parse(in);
            } finally {
                CloseableUtil.closeQuietly(in);
            }
    
            // Directives before User-agent should be ignored
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 20.6K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

     * </ul>
     *
     * @author bowez
     * @author shinsuke
     *
     */
    public class RobotsTxtHelper {
    
        /** Pattern for parsing user-agent records. */
        protected static final Pattern USER_AGENT_RECORD =
                Pattern.compile("^user-agent:\\s*([^\\t\\n\\x0B\\f\\r]+)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing disallow records. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 11.4K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

            this.authSchemeProviderMap = authSchemeProviderMap;
        }
    
        /**
         * Sets the user agent string.
         * @param userAgent The user agent string.
         */
        public void setUserAgent(final String userAgent) {
            this.userAgent = userAgent;
        }
    
        /**
         * Sets the credentials provider.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/helper/PathMappingHelper.java

                logger.debug("replace: {} -> {}", url, newUrl);
            }
            return newUrl;
        }
    
        /**
         * Checks if the user agent matches the path mapping.
         *
         * @param pathMapping the path mapping
         * @return true if the user agent matches
         */
        protected boolean matchUserAgent(final PathMapping pathMapping) {
            if (!pathMapping.hasUAMathcer()) {
                return true;
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 9.5K bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/Constants.java

        // ============================================================
        // User Agent Configuration
        // ============================================================
    
        /** Prefix for Fess crawler user agent string. */
        public static final String CRAWLING_USER_AGENT_PREFIX = "Mozilla/5.0 (compatible; Fess/";
    
        /** Suffix for Fess crawler user agent string with bot information URL. */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Sat Dec 13 02:21:17 UTC 2025
    - 35.2K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

         */
        public void setCookieSpec(final String cookieSpec) {
            this.cookieSpec = cookieSpec;
        }
    
        /**
         * Sets the user agent string.
         *
         * @param userAgent The user agent string
         */
        public void setUserAgent(final String userAgent) {
            this.userAgent = userAgent;
        }
    
        /**
         * Sets the proxy host name.
         *
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 53.7K bytes
    - Viewed (0)
Back to top