Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 12 for m_regex (0.03 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

            assertFalse(urlFilter.match("https://other.com/page.html"));
        }
    
        /**
         * Test adding invalid regex include pattern
         */
        public void test_addInclude_invalidRegex() {
            String sessionId = "test-session-004";
            urlFilter.init(sessionId);
    
            // Invalid regex pattern should be handled gracefully
            urlFilter.addInclude(".*[invalid");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 19K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/form/FormScheme.java

    import java.io.UnsupportedEncodingException;
    import java.net.URLEncoder;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;
    import java.util.function.BiConsumer;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import java.util.stream.Collectors;
    
    import org.apache.http.Header;
    import org.apache.http.HttpEntity;
    import org.apache.http.HttpRequest;
    import org.apache.http.HttpResponse;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14.3K bytes
    - Viewed (1)
  3. README.md

    - **Extensible Architecture**: Plugin system for custom extractors, transformers, and clients
    - **Rate Limiting**: Politeness policies and interval controllers
    - **URL Filtering**: Regex-based inclusion/exclusion patterns
    - **Data Persistence**: Multiple backend options including OpenSearch integration
    
    ## Technology Stack
    
    - **Java**: 21+ (requires Java 21 or higher)
    - **Build System**: Maven 3.x
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

    import java.io.InputStream;
    import java.io.UnsupportedEncodingException;
    import java.util.ArrayList;
    import java.util.LinkedHashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    import javax.xml.xpath.XPathEvaluationResult;
    import javax.xml.xpath.XPathException;
    import javax.xml.xpath.XPathNodes;
    
    import org.apache.logging.log4j.LogManager;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

    import java.nio.charset.Charset;
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.LinkedHashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import java.util.stream.Stream;
    
    import javax.xml.xpath.XPathException;
    import javax.xml.xpath.XPathNodes;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

     */
    package org.codelibs.fess.crawler.entity;
    
    import java.util.ArrayList;
    import java.util.LinkedHashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Pattern;
    
    import org.codelibs.core.lang.StringUtil;
    
    /**
     * Represents a robots.txt file parser and handler.
     * This class manages the rules defined in a robots.txt file, including user agent directives,
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java

    import java.util.Iterator;
    import java.util.LinkedHashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.concurrent.ExecutionException;
    import java.util.concurrent.TimeUnit;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    import javax.xml.XMLConstants;
    import javax.xml.namespace.NamespaceContext;
    import javax.xml.parsers.DocumentBuilder;
    import javax.xml.parsers.DocumentBuilderFactory;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 23.9K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

    import java.io.BufferedInputStream;
    import java.io.InputStream;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.concurrent.ExecutionException;
    import java.util.concurrent.TimeUnit;
    import java.util.regex.Pattern;
    
    import javax.xml.xpath.XPathNodes;
    
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleTest.java

            ConfigurableRule rule = new ConfigurableRule();
            rule.setRuleId("specialRule");
            rule.setResponseProcessor(new TestResponseProcessor("specialProcessor"));
    
            // Test with regex special characters
            rule.addCondition("url", "https://example\\.com/\\?param=.*");
    
            ResponseData responseData1 = new ResponseData();
            responseData1.setUrl("https://example.com/?param=value");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 22.7K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/suggest/settings/ArraySettings.java

    import java.time.ZonedDateTime;
    import java.time.format.DateTimeFormatter;
    import java.util.Arrays;
    import java.util.Base64;
    import java.util.HashMap;
    import java.util.Locale;
    import java.util.Map;
    import java.util.regex.Pattern;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.CoreLibConstants;
    import org.codelibs.core.lang.StringUtil;
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Thu Aug 07 02:41:28 UTC 2025
    - 15.6K bytes
    - Viewed (0)
Back to top