- Sort Score
- Result 10 results
- Languages All
Results 11 - 20 of 32 for m_regex (0.1 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.util.regex.Pattern; /** * Extracts text content from XML documents. */ public class XmlExtractor extends AbstractXmlExtractor { /** * Creates a new XmlExtractor instance. */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/SitemapsRule.java
* represents a valid sitemap. It uses a SitemapsHelper to validate the response body as an InputStream. * The rule checks if the URL matches the defined regex pattern and then validates the content as a sitemap. * If any exception occurs during the sitemap validation, it logs the error and returns false. * */ public class SitemapsRule extends RegexRule { /**Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.6K bytes - Viewed (0) -
fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml
<match value="[0-9]{5,5}" type="regex" offset="0"> <match value="45" type="string" offset="20"> <!-- bibliographic --> <match value="[acdnp][acdefgijkmoprt][abcdims]" type="regex" offset="5"/> <!-- authority--> <match value="[acdnosx]z" type="regex" offset="5"/> <!-- holdings --> <match value="[cdn][uvxy]" type="regex" offset="5"/> <!-- classification -->Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Mar 13 08:18:01 UTC 2025 - 320.1K bytes - Viewed (1) -
README.md
- **Extensible Architecture**: Plugin system for custom extractors, transformers, and clients - **Rate Limiting**: Politeness policies and interval controllers - **URL Filtering**: Regex-based inclusion/exclusion patterns - **Data Persistence**: Multiple backend options including OpenSearch integration ## Technology Stack - **Java**: 21+ (requires Java 21 or higher) - **Build System**: Maven 3.x
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/service/UrlFilterService.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.service; import java.util.List; import java.util.regex.Pattern; /** * Service interface for managing URL filters. * Provides methods to add and remove include/exclude URL filters, * as well as retrieve the patterns of these filters. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 3.1K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/SitemapsRuleTest.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.rule.impl; import java.io.File; import java.io.InputStream; import java.util.regex.Pattern; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil; import org.codelibs.fess.crawler.container.StandardCrawlerContainer; import org.codelibs.fess.crawler.entity.ResponseData;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 4.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java
import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.ByteOrderMark; import org.apache.commons.io.input.BOMInputStream; import org.apache.commons.text.translate.AggregateTranslator;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 8.5K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/RegexRuleTest.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.rule.impl; import java.util.regex.Pattern; import org.codelibs.fess.crawler.entity.ResponseData; import org.dbflute.utflute.core.PlainTestCase; /** * @author shinsuke * */ public class RegexRuleTest extends PlainTestCase {Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 4.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/service/impl/UrlFilterServiceImpl.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.service.impl; import java.util.List; import java.util.regex.Pattern; import org.codelibs.fess.crawler.helper.MemoryDataHelper; import org.codelibs.fess.crawler.service.UrlFilterService; import jakarta.annotation.Resource; /**
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java
import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.xpath.XPathEvaluationResult; import javax.xml.xpath.XPathException; import javax.xml.xpath.XPathNodes; import org.apache.logging.log4j.LogManager;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 13.1K bytes - Viewed (0)