Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 48 for roots (0.57 sec)

  1. src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

        /** XPath expression for extracting robots content from meta tags */
        private static final String META_NAME_ROBOTS_CONTENT = "//META[@name=\"robots\" or @name=\"ROBOTS\"]/@content";
    
        /** Robots tag value indicating no indexing or following */
        private static final String ROBOTS_TAG_NONE = "none";
    
        /** Robots tag value indicating no indexing */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Dec 12 13:58:40 UTC 2025
    - 54.6K bytes
    - Viewed (0)
  2. CLAUDE.md

    - **Testing**: JUnit 4, UTFlute, Mockito 5.7.0
    - **Storage**: In-memory (default), OpenSearch (optional)
    
    ### Protocols
    
    - **HTTP/HTTPS**: Full crawling, cookies, auth, robots.txt
    - **File**: Local/network file systems
    - **FTP**: With authentication
    - **SMB/CIFS**: Windows shares (SMB1/SMB2+)
    - **Storage**: MinIO/S3-compatible
    
    ### Content Formats
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            robotsTxtHelper = container.getComponent("robotsTxtHelper");
        }
    
        public void testParse() {
            RobotsTxt robotsTxt;
            final InputStream in = RobotsTxtHelperTest.class.getResourceAsStream("robots.txt");
            try {
                robotsTxt = robotsTxtHelper.parse(in);
            } finally {
                CloseableUtil.closeQuietly(in);
            }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 20.6K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

    import java.util.Map;
    import java.util.regex.Pattern;
    
    import org.codelibs.core.lang.StringUtil;
    
    /**
     * Represents a robots.txt file parser and handler.
     * This class manages the rules defined in a robots.txt file, including user agent directives,
     * allowed/disallowed paths, crawl delays, and sitemap URLs.
     *
     * <p>The robots.txt protocol is implemented according to the standard specification,
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 18.5K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

     * </ul>
     *
     * <p>References:</p>
     * <ul>
     * <li><a href="https://datatracker.ietf.org/doc/html/rfc9309">RFC 9309 - Robots Exclusion Protocol</a></li>
     * <li><a href="https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt">
     * Google's robots.txt Specification</a></li>
     * </ul>
     *
     * @author bowez
     * @author shinsuke
     *
     */
    public class RobotsTxtHelper {
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 11.4K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                httpClientPropertyMap.put(name, value);
            }
        }
    
        /**
         * Processes robots.txt for the given URL.
         * This method fetches and parses the robots.txt file to extract disallow/allow rules
         * and sitemap information.
         *
         * @param url The URL to process robots.txt for
         */
        protected void processRobotsTxt(final String url) {
            if (StringUtil.isBlank(url)) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 53.7K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/fess/script/ScriptEngineFactory.java

            }
            if (logger.isDebugEnabled()) {
                logger.debug("Loaded ScriptEngine: {}", name);
            }
            scriptEngineMap.put(name.toLowerCase(Locale.ROOT), scriptEngine);
            scriptEngineMap.put(scriptEngine.getClass().getSimpleName().toLowerCase(Locale.ROOT), scriptEngine);
        }
    
        /**
         * Gets a script engine.
         * @param name The name of the script engine.
         * @return The script engine.
         */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  8. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots_wildcard.txt

    # Test robots.txt for wildcard (*) and end-of-path ($) support
    # Based on RFC 9309 specification
    
    # Test wildcard patterns
    User-agent: WildcardBot
    Disallow: /*.pdf$
    Disallow: /admin/*.php
    Disallow: /*/private/
    Allow: /public/*.html
    
    # Test end-of-path ($) patterns
    User-agent: EndPathBot
    Disallow: /fish$
    Disallow: /temp$
    Allow: /fishing
    
    # Test complex patterns
    User-agent: ComplexBot
    Disallow: /
    Allow: /$
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 13 14:03:41 UTC 2025
    - 910 bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/mylasta/direction/FessEnv.java

        /** The key of the configuration. e.g. [Test] */
        String MAIL_SUBJECT_TEST_PREFIX = "mail.subject.test.prefix";
    
        /** The key of the configuration. e.g. root@localhost */
        String MAIL_RETURN_PATH = "mail.return.path";
    
        /**
         * Get the value of property as {@link String}.
         * @param propertyKey The key of the property. (NotNull)
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Nov 27 07:01:25 UTC 2025
    - 9.9K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/taglib/FessFunctions.java

         * Retrieves the current user's locale from the request manager.
         *
         * @return the user's locale, or Locale.ROOT if not available
         */
        private static Locale getUserLocale() {
            final Locale locale = ComponentUtil.getRequestManager().getUserLocale();
            if (locale == null) {
                return Locale.ROOT;
            }
            return locale;
        }
    
        /**
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 25.4K bytes
    - Viewed (1)
Back to top