- Sort Score
- Result 10 results
- Languages All
Results 11 - 20 of 45 for batters (0.07 sec)
-
fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml
</magic> <glob pattern="*.bin"/> <glob pattern="*.dms"/> <glob pattern="*.lha"/> <glob pattern="*.lrf"/> <glob pattern="*.lzh"/> <glob pattern="*.so"/> <glob pattern="*.dist"/> <glob pattern="*.distz"/> <glob pattern="*.pkg"/> <glob pattern="*.bpk"/> <glob pattern="*.dump"/> <glob pattern="*.elc"/> <glob pattern="*.deploy"/>Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Mar 13 08:18:01 UTC 2025 - 320.1K bytes - Viewed (1) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java
protected static final Pattern ALLOW_RECORD = Pattern.compile("^allow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE); /** Pattern for parsing crawl-delay records. */ protected static final Pattern CRAWL_DELAY_RECORD = Pattern.compile("^crawl-delay:\\s*([^\\s]+)\\s*$", Pattern.CASE_INSENSITIVE); /** * Pattern for Sitemap record. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 7.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java
/** * Pattern for XML tags. */ protected Pattern xmlTagPattern = Pattern.compile("<[^>]+>"); /** * Returns the encoding pattern. * @return The encoding pattern. */ @Override protected Pattern getEncodingPattern() { return xmlEncodingPattern; } /**Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.6K bytes - Viewed (0) -
src/test/java/org/codelibs/opensearch/extension/analysis/PatternConcatenationFilterFactory.java
import org.opensearch.index.analysis.AbstractTokenFilterFactory; public class PatternConcatenationFilterFactory extends AbstractTokenFilterFactory { private Pattern pattern1; private Pattern pattern2; public PatternConcatenationFilterFactory(final IndexSettings indexSettings, final Environment environment, final String name, final Settings settings) { super(indexSettings, name, settings);Registered: Fri Sep 19 09:08:11 UTC 2025 - Last Modified: Sat Mar 15 06:51:20 UTC 2025 - 1.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java
* @return the parsed charset name, or null if not found */ protected String parseCharset(final String content) { final Pattern pattern = Pattern.compile("; *charset *= *([a-zA-Z0-9\\-_]+)", Pattern.CASE_INSENSITIVE); final Matcher matcher = pattern.matcher(content); if (matcher.find()) { return matcher.group(1); } return null; } /**Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 28.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/UrlFilter.java
* @param url URL * @return true if url is matched */ boolean match(String url); /** * Add an url pattern as a target. * * @param urlPattern Regular expression that is crawled */ void addInclude(String urlPattern); /** * Add an url pattern as a non-target. * * @param urlPattern Regular expression that is not crawled */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/util/CharUtil.java
*/ private CharUtil() { } /** * Checks if the given character is a valid URL character. * * Valid URL characters include: * - Lowercase letters (a-z) * - Uppercase letters (A-Z) * - Digits (0-9) * - Special characters: . - * _ : / + % = & ? # [ ] @ ~ ! $ ' ( ) , ; * * @param c the character to checkRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.3K bytes - Viewed (1) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/SitemapsRuleTest.java
sitemapsRule.addRule("url", Pattern.compile(".*sitemap.*")); assertTrue(sitemapsRule.match(responseData)); InputStream is = responseData.getResponseBody(); assertTrue(is instanceof InputStream); CloseableUtil.closeQuietly(responseData); } private void assertMatchFalse(ResponseData responseData) { sitemapsRule.addRule("url", Pattern.compile(".*sitemap.*"));
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 4.7K bytes - Viewed (0) -
README.md
crawler.crawlerContext.setDefaultIntervalTime(1000); // 1 second ``` ### URL Filtering ```java // Include patterns crawler.urlFilter.addInclude("https://example.com/.*"); crawler.urlFilter.addInclude(".*\\.pdf$"); // Exclude patterns crawler.urlFilter.addExclude(".*\\.js$"); crawler.urlFilter.addExclude(".*login.*"); ``` ## Supported Protocols and Formats
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java
/** Pattern for extracting charset from meta tags. */ protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); /** * Pattern for HTML tags. */ protected Pattern htmlTagPattern = Pattern.compile("<[^>]+>"); /** Map of parser features. */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 9.3K bytes - Viewed (0)