- Sort Score
- Result 10 results
- Languages All
Results 41 - 50 of 64 for mhtml (0.01 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java
import org.w3c.dom.Document; import org.w3c.dom.Node; import org.xml.sax.InputSource; /** * {@link XpathTransformer} is a class that transforms HTML content into XML format based on XPath expressions. * It extracts data from an HTML document by applying XPath rules defined in {@link #fieldRuleMap}. * The extracted data is then formatted into an XML structure and stored in the {@link ResultData}. * <p>Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 13.1K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java
transformer.addTransformationRule("\\s+", " "); // Normalize whitespace // Simulate crawling response ResponseData responseData = new ResponseData(); responseData.setUrl("http://example.com/page.html"); responseData.setParentUrl("http://example.com/"); responseData.setResponseBody("<html><body> Test Content </body></html>".getBytes());
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 28K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler/extractor.xml
<postConstruct name="addExtractor"> <arg>[ "application/xml", "application/xhtml+xml", "application/rdf+xml", "application/x-freemind", "text/xml" ]</arg> <arg>xmlExtractor</arg> </postConstruct> <postConstruct name="addExtractor"> <arg>[ "text/html" ]</arg> <arg>htmlExtractor</arg> </postConstruct> <postConstruct name="addExtractor"> <arg>[
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Aug 01 21:40:30 UTC 2020 - 49K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/RegexRule.java
* rule.addRule("url", "https://example.com/.*"); * rule.addRule("contentType", "text/html"); * rule.setAllRequired(true); // Both URL and content type must match * * ResponseData responseData = new ResponseData(); * responseData.setUrl("https://example.com/page1"); * responseData.setContentType("text/html"); * * boolean matches = rule.match(responseData); // Returns true * } * </pre> * */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 6.2K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractorTest.java
LhaExtractor lhaExtractor = container.getComponent("lhaExtractor"); factory.addExtractor("text/plain", tikaExtractor); factory.addExtractor("text/html", tikaExtractor); factory.addExtractor("application/x-lha", lhaExtractor); })// ; lhaExtractor = container.getComponent("lhaExtractor"); }
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 3.4K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java
assertTrue(urlFilter.match("https://example.com/page.html")); assertTrue(urlFilter.match("https://example.com/document.pdf")); assertFalse(urlFilter.match("https://example.com/image.jpg")); assertFalse(urlFilter.match("https://example.com/admin/dashboard")); assertFalse(urlFilter.match("https://other.com/page.html")); } /** * Test match with no patterns configuredRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Wed Sep 03 14:42:53 UTC 2025 - 19K bytes - Viewed (0) -
fess-crawler/src/test/resources/extractor/tar/test.tar
data/folder/file.html テスト data/file.txt テキスト...
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 10K bytes - Viewed (0) -
fess-crawler/src/test/resources/extractor/gz/test.tar.gz
test.tar data/folder/file.html テスト data/file.txt テキスト...
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 351 bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractorTest.java
ZipExtractor zipExtractor = container.getComponent("zipExtractor"); factory.addExtractor("text/plain", tikaExtractor); factory.addExtractor("text/html", tikaExtractor); factory.addExtractor("application/zip", zipExtractor); })// ; zipExtractor = container.getComponent("zipExtractor"); }
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 3.7K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TarExtractorTest.java
TarExtractor tarExtractor = container.getComponent("tarExtractor"); factory.addExtractor("text/plain", tikaExtractor); factory.addExtractor("text/html", tikaExtractor); factory.addExtractor("application/tar", tarExtractor); })// ; tarExtractor = container.getComponent("tarExtractor"); }
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 3.7K bytes - Viewed (0)