- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 48 for roots (0.57 sec)
-
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java
/** XPath expression for extracting robots content from meta tags */ private static final String META_NAME_ROBOTS_CONTENT = "//META[@name=\"robots\" or @name=\"ROBOTS\"]/@content"; /** Robots tag value indicating no indexing or following */ private static final String ROBOTS_TAG_NONE = "none"; /** Robots tag value indicating no indexing */Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Fri Dec 12 13:58:40 UTC 2025 - 54.6K bytes - Viewed (0) -
CLAUDE.md
- **Testing**: JUnit 4, UTFlute, Mockito 5.7.0 - **Storage**: In-memory (default), OpenSearch (optional) ### Protocols - **HTTP/HTTPS**: Full crawling, cookies, auth, robots.txt - **File**: Local/network file systems - **FTP**: With authentication - **SMB/CIFS**: Windows shares (SMB1/SMB2+) - **Storage**: MinIO/S3-compatible ### Content Formats
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Fri Nov 28 17:31:34 UTC 2025 - 10.7K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java
robotsTxtHelper = container.getComponent("robotsTxtHelper"); } public void testParse() { RobotsTxt robotsTxt; final InputStream in = RobotsTxtHelperTest.class.getResourceAsStream("robots.txt"); try { robotsTxt = robotsTxtHelper.parse(in); } finally { CloseableUtil.closeQuietly(in); }
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 20.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java
import java.util.Map; import java.util.regex.Pattern; import org.codelibs.core.lang.StringUtil; /** * Represents a robots.txt file parser and handler. * This class manages the rules defined in a robots.txt file, including user agent directives, * allowed/disallowed paths, crawl delays, and sitemap URLs. * * <p>The robots.txt protocol is implemented according to the standard specification,
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 18.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java
* </ul> * * <p>References:</p> * <ul> * <li><a href="https://datatracker.ietf.org/doc/html/rfc9309">RFC 9309 - Robots Exclusion Protocol</a></li> * <li><a href="https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt"> * Google's robots.txt Specification</a></li> * </ul> * * @author bowez * @author shinsuke * */ public class RobotsTxtHelper {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Fri Nov 14 12:52:01 UTC 2025 - 11.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java
httpClientPropertyMap.put(name, value); } } /** * Processes robots.txt for the given URL. * This method fetches and parses the robots.txt file to extract disallow/allow rules * and sitemap information. * * @param url The URL to process robots.txt for */ protected void processRobotsTxt(final String url) { if (StringUtil.isBlank(url)) {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 53.7K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/script/ScriptEngineFactory.java
} if (logger.isDebugEnabled()) { logger.debug("Loaded ScriptEngine: {}", name); } scriptEngineMap.put(name.toLowerCase(Locale.ROOT), scriptEngine); scriptEngineMap.put(scriptEngine.getClass().getSimpleName().toLowerCase(Locale.ROOT), scriptEngine); } /** * Gets a script engine. * @param name The name of the script engine. * @return The script engine. */Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Fri Nov 28 16:29:12 UTC 2025 - 2.6K bytes - Viewed (0) -
fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots_wildcard.txt
# Test robots.txt for wildcard (*) and end-of-path ($) support # Based on RFC 9309 specification # Test wildcard patterns User-agent: WildcardBot Disallow: /*.pdf$ Disallow: /admin/*.php Disallow: /*/private/ Allow: /public/*.html # Test end-of-path ($) patterns User-agent: EndPathBot Disallow: /fish$ Disallow: /temp$ Allow: /fishing # Test complex patterns User-agent: ComplexBot Disallow: / Allow: /$
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Nov 13 14:03:41 UTC 2025 - 910 bytes - Viewed (0) -
src/main/java/org/codelibs/fess/mylasta/direction/FessEnv.java
/** The key of the configuration. e.g. [Test] */ String MAIL_SUBJECT_TEST_PREFIX = "mail.subject.test.prefix"; /** The key of the configuration. e.g. root@localhost */ String MAIL_RETURN_PATH = "mail.return.path"; /** * Get the value of property as {@link String}. * @param propertyKey The key of the property. (NotNull)Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Thu Nov 27 07:01:25 UTC 2025 - 9.9K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/taglib/FessFunctions.java
* Retrieves the current user's locale from the request manager. * * @return the user's locale, or Locale.ROOT if not available */ private static Locale getUserLocale() { final Locale locale = ComponentUtil.getRequestManager().getUserLocale(); if (locale == null) { return Locale.ROOT; } return locale; } /**
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Fri Nov 28 16:29:12 UTC 2025 - 25.4K bytes - Viewed (1)