- Sort Score
- Result 10 results
- Languages All
Results 11 - 20 of 158 for extractors (0.07 sec)
-
fess-crawler-lasta/src/main/resources/crawler/extractor.xml
class="org.codelibs.fess.crawler.extractor.impl.MsPowerPointExtractor" /> <component name="msPublisherExtractor" class="org.codelibs.fess.crawler.extractor.impl.MsPublisherExtractor" /> <component name="msVisioExtractor" class="org.codelibs.fess.crawler.extractor.impl.MsVisioExtractor" /> <component name="pdfExtractor" class="org.codelibs.fess.crawler.extractor.impl.PdfExtractor" /> <component name="lhaExtractor"
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 50.1K bytes - Viewed (0) -
src/main/resources/crawler/extractor+tikaExtractor.xml
"http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <component name="tikaExtractor" class="org.codelibs.fess.crawler.extractor.impl.TikaExtractor"> <property name="maxCompressionRatio">2</property> <property name="maxUncompressionSize">10000000</property> </component>
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Sun Aug 25 12:46:12 UTC 2019 - 461 bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; /** * PdfExtractor extracts text content from PDF files using Apache PDFBox. * It supports password-protected PDFs and can extract embedded documents and annotations. *
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java
*/ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import java.util.Map; import org.apache.poi.hslf.usermodel.HSLFShape; import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.hslf.usermodel.HSLFTextParagraph; import org.apache.poi.sl.extractor.SlideShowExtractor; import org.codelibs.fess.crawler.entity.ExtractData;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java
*/ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import java.util.Map; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts the filename from the parameters as the content. *
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Wed Nov 19 08:55:01 UTC 2025 - 2.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import java.util.Map; import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 1.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java
import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import jakarta.mail.Address; import jakarta.mail.BodyPart; import jakarta.mail.Header; import jakarta.mail.Message;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 12.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsExcelExtractor.java
validateInputStream(in); try (final HSSFWorkbook workbook = new HSSFWorkbook(in); final org.apache.poi.hssf.extractor.ExcelExtractor excelExtractor = new org.apache.poi.hssf.extractor.ExcelExtractor(workbook)) { return new ExtractData(excelExtractor.getText()); } catch (final IOException e) {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 1.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractor.java
super(); } /** * Extracts text from the Word input stream. * @param in The input stream. * @param params The parameters. * @return The extracted data. */ @Override public ExtractData getText(final InputStream in, final Map<String, String> params) { validateInputStream(in);
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Wed Nov 19 08:55:01 UTC 2025 - 1.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; /** * Extracts text content and metadata from JSON files. * This extractor provides better structured data extraction compared to Tika's generic text extraction. * * <p>Features: * <ul> * <li>Structured text extraction with key-value pairs</li>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 9.7K bytes - Viewed (0)