Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 24 for aient (0.02 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

     *   <li>Manages crawl delay settings per user agent</li>
     *   <li>Stores sitemap URLs listed in robots.txt</li>
     * </ul>
     *
     * <p>The class uses case-insensitive pattern matching for user agents and supports
     * wildcard characters (*) in user agent strings. When multiple directives match a user agent,
     * the most specific (longest) match is used.</p>
     *
     */
    public class RobotsTxt {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 18.5K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            RobotsTxt robotsTxt;
            final InputStream in = new java.io.ByteArrayInputStream(robotsTxtContent.getBytes());
            try {
                robotsTxt = robotsTxtHelper.parse(in);
            } finally {
                CloseableUtil.closeQuietly(in);
            }
    
            // Directives before User-agent should be ignored
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 20.6K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

     * </ul>
     *
     * @author bowez
     * @author shinsuke
     *
     */
    public class RobotsTxtHelper {
    
        /** Pattern for parsing user-agent records. */
        protected static final Pattern USER_AGENT_RECORD =
                Pattern.compile("^user-agent:\\s*([^\\t\\n\\x0B\\f\\r]+)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing disallow records. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 11.4K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

            this.authSchemeProviderMap = authSchemeProviderMap;
        }
    
        /**
         * Sets the user agent string.
         * @param userAgent The user agent string.
         */
        public void setUserAgent(final String userAgent) {
            this.userAgent = userAgent;
        }
    
        /**
         * Sets the credentials provider.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/Constants.java

        // ============================================================
        // User Agent Configuration
        // ============================================================
    
        /** Prefix for Fess crawler user agent string. */
        public static final String CRAWLING_USER_AGENT_PREFIX = "Mozilla/5.0 (compatible; Fess/";
    
        /** Suffix for Fess crawler user agent string with bot information URL. */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Sat Dec 13 02:21:17 UTC 2025
    - 35.2K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/RobotsTxtTest.java

            Directive retrieved = robotsTxt.getDirective("OtherBot");
            assertNull(retrieved);
        }
    
        public void test_getDirectiveWithNull() {
            // Test getDirective with null user agent
            RobotsTxt robotsTxt = new RobotsTxt();
    
            Directive retrieved = robotsTxt.getDirective(null);
            assertNull(retrieved);
        }
    
        public void test_getMatchedDirectiveWithExactMatch() {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 13 13:29:22 UTC 2025
    - 14.4K bytes
    - Viewed (0)
  7. README.md

    ```bash
    # Format code (Eclipse formatter)
    mvn formatter:format
    
    # Check/apply license headers
    mvn license:check
    mvn license:format
    
    # Generate test coverage report
    mvn jacoco:prepare-agent test jacoco:report
    
    # Generate API documentation
    mvn javadoc:javadoc
    ```
    
    ### Testing
    
    The project uses JUnit 4 with embedded OpenSearch for integration testing:
    
    ```bash
    # Run all tests
    Registered: Sat Dec 20 13:04:59 UTC 2025
    - Last Modified: Sun Aug 31 03:31:14 UTC 2025
    - 12.1K bytes
    - Viewed (1)
  8. pom.xml

    			</plugin>
    			<plugin>
    				<groupId>org.jacoco</groupId>
    				<artifactId>jacoco-maven-plugin</artifactId>
    				<version>0.8.13</version>
    				<executions>
    					<execution>
    						<goals>
    							<goal>prepare-agent</goal>
    						</goals>
    					</execution>
    					<execution>
    						<id>report</id>
    						<phase>prepare-package</phase>
    						<goals>
    							<goal>report</goal>
    						</goals>
    					</execution>
    Registered: Sat Dec 20 13:44:44 UTC 2025
    - Last Modified: Mon Aug 25 14:34:10 UTC 2025
    - 12.1K bytes
    - Viewed (0)
  9. CLAUDE.md

    extractorFactory.addExtractor("text/html", htmlExtractor, 2);  // Weight 2
    extractorFactory.addExtractor("text/html", tikaExtractor, 1);  // Fallback
    ```
    
    ### Helpers
    
    **RobotsTxtHelper**: RFC 9309 parsing, user-agent matching, crawl-delay, sitemaps
    **SitemapsHelper**: Sitemap XML parsing, index handling
    **MimeTypeHelper**: MIME detection via Tika
    **EncodingHelper**: Charset detection with BOM
    **UrlConvertHelper**: URL normalization
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/util/ComponentUtil.java

         */
        public static WebApiManagerFactory getWebApiManagerFactory() {
            return getComponent(WEB_API_MANAGER_FACTORY);
        }
    
        /**
         * Gets the user agent helper component.
         * @return The user agent helper.
         */
        public static UserAgentHelper getUserAgentHelper() {
            return getComponent(USER_AGENT_HELPER);
        }
    
        /**
         * Gets the data store factory component.
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 28.9K bytes
    - Viewed (0)
Back to top