- Sort Score
- Num 10 results
- Language All
Results 1 - 2 of 2 for addExtractor (0.06 seconds)
-
fess-crawler-lasta/src/main/resources/crawler/extractor.xml
<postConstruct name="addExtractor"> <arg>[ "application/pdf" ]</arg> <arg>pdfExtractor</arg> </postConstruct> <postConstruct name="addExtractor"> <arg>[ "application/x-lha", "application/x-lharc" ]</arg> <arg>lhaExtractor</arg> </postConstruct> <postConstruct name="addExtractor"> <arg>[ "message/rfc822" ]</arg> <arg>emlExtractor</arg>
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Wed Feb 11 01:15:55 GMT 2026 - 50.4K bytes - Click Count (0) -
README.md
// Configure content extraction container.singleton("tikaExtractor", TikaExtractor.class); container.singleton("extractorFactory", ExtractorFactory.class, factory -> { factory.addExtractor("text/html", container.getComponent("tikaExtractor")); factory.addExtractor("application/pdf", container.getComponent("tikaExtractor")); }); Crawler crawler = container.getComponent("crawler"); crawler.addUrl("https://example.com");
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Sun Aug 31 05:32:52 GMT 2025 - 15.3K bytes - Click Count (0)