Search Options

Results per page
Sort
Preferred Languages
Advance

Results 11 - 20 of 45 for batters (0.07 sec)

  1. fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml

        </magic>
        <glob pattern="*.bin"/>
        <glob pattern="*.dms"/>
        <glob pattern="*.lha"/>
        <glob pattern="*.lrf"/>
        <glob pattern="*.lzh"/>
        <glob pattern="*.so"/>
        <glob pattern="*.dist"/>
        <glob pattern="*.distz"/>
        <glob pattern="*.pkg"/>
        <glob pattern="*.bpk"/>
        <glob pattern="*.dump"/>
        <glob pattern="*.elc"/>
        <glob pattern="*.deploy"/>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Mar 13 08:18:01 UTC 2025
    - 320.1K bytes
    - Viewed (1)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

        protected static final Pattern ALLOW_RECORD = Pattern.compile("^allow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing crawl-delay records. */
        protected static final Pattern CRAWL_DELAY_RECORD = Pattern.compile("^crawl-delay:\\s*([^\\s]+)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /**
         * Pattern for Sitemap record.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java

        /**
         * Pattern for XML tags.
         */
        protected Pattern xmlTagPattern = Pattern.compile("<[^>]+>");
    
        /**
         * Returns the encoding pattern.
         * @return The encoding pattern.
         */
        @Override
        protected Pattern getEncodingPattern() {
            return xmlEncodingPattern;
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  4. src/test/java/org/codelibs/opensearch/extension/analysis/PatternConcatenationFilterFactory.java

    import org.opensearch.index.analysis.AbstractTokenFilterFactory;
    
    public class PatternConcatenationFilterFactory extends AbstractTokenFilterFactory {
    
        private Pattern pattern1;
    
        private Pattern pattern2;
    
        public PatternConcatenationFilterFactory(final IndexSettings indexSettings, final Environment environment, final String name,
                final Settings settings) {
            super(indexSettings, name, settings);
    
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Sat Mar 15 06:51:20 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

         * @return the parsed charset name, or null if not found
         */
        protected String parseCharset(final String content) {
            final Pattern pattern = Pattern.compile("; *charset *= *([a-zA-Z0-9\\-_]+)", Pattern.CASE_INSENSITIVE);
            final Matcher matcher = pattern.matcher(content);
            if (matcher.find()) {
                return matcher.group(1);
            }
            return null;
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/UrlFilter.java

         * @param url URL
         * @return true if url is matched
         */
        boolean match(String url);
    
        /**
         * Add an url pattern as a target.
         *
         * @param urlPattern Regular expression that is crawled
         */
        void addInclude(String urlPattern);
    
        /**
         * Add an url pattern as a non-target.
         *
         * @param urlPattern Regular expression that is not crawled
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.6K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/CharUtil.java

         */
        private CharUtil() {
        }
    
        /**
         * Checks if the given character is a valid URL character.
         *
         * Valid URL characters include:
         * - Lowercase letters (a-z)
         * - Uppercase letters (A-Z)
         * - Digits (0-9)
         * - Special characters: . - * _ : / + % = &amp; ? # [ ] @ ~ ! $ ' ( ) , ;
         *
         * @param c the character to check
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.3K bytes
    - Viewed (1)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/SitemapsRuleTest.java

            sitemapsRule.addRule("url", Pattern.compile(".*sitemap.*"));
            assertTrue(sitemapsRule.match(responseData));
            InputStream is = responseData.getResponseBody();
            assertTrue(is instanceof InputStream);
            CloseableUtil.closeQuietly(responseData);
        }
    
        private void assertMatchFalse(ResponseData responseData) {
            sitemapsRule.addRule("url", Pattern.compile(".*sitemap.*"));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 4.7K bytes
    - Viewed (0)
  9. README.md

    crawler.crawlerContext.setDefaultIntervalTime(1000); // 1 second
    ```
    
    ### URL Filtering
    
    ```java
    // Include patterns
    crawler.urlFilter.addInclude("https://example.com/.*");
    crawler.urlFilter.addInclude(".*\\.pdf$");
    
    // Exclude patterns  
    crawler.urlFilter.addExclude(".*\\.js$");
    crawler.urlFilter.addExclude(".*login.*");
    ```
    
    ## Supported Protocols and Formats
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

        /** Pattern for extracting charset from meta tags. */
        protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>",
                Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
    
        /**
         * Pattern for HTML tags.
         */
        protected Pattern htmlTagPattern = Pattern.compile("<[^>]+>");
    
        /** Map of parser features. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.3K bytes
    - Viewed (0)
Back to top