- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 337 for extractor (0.04 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java
* optionally override the default weight value. */ public interface Extractor { /** * Extracts text data from the given input stream. * * @param in the input stream to extract text from * @param params a map of parameters to be used during extraction * @return an ExtractData object containing the extracted text */ ExtractData getText(InputStream in, Map<String, String> params); /**Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.6K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler/extractor.xml
class="org.codelibs.fess.crawler.extractor.impl.MsPowerPointExtractor" /> <component name="msPublisherExtractor" class="org.codelibs.fess.crawler.extractor.impl.MsPublisherExtractor" /> <component name="msVisioExtractor" class="org.codelibs.fess.crawler.extractor.impl.MsVisioExtractor" /> <component name="pdfExtractor" class="org.codelibs.fess.crawler.extractor.impl.PdfExtractor" /> <component name="lhaExtractor"
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 50.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java
*/ public Extractor[] getExtractors(final String key) { final Extractor[] extractors = extractorMap.get(key); if (extractors == null || extractors.length == 0) { return new Extractor[0]; } return extractors; } /** * Sets the extractor map with the provided map. *Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 7.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; /** * PdfExtractor extracts text content from PDF files using Apache PDFBox. * It supports password-protected PDFs and can extract embedded documents and annotations. * * <p>The extractor runs text extraction in a separate thread with a configurable timeout
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import org.codelibs.fess.crawler.util.IgnoreCloseInputStream; import jp.gr.java_conf.dangan.util.lha.LhaFile; import jp.gr.java_conf.dangan.util.lha.LhaHeader; /** * Extractor implementation for LHA (LZH) archive files.
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 5.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java
dfos = out; CopyUtil.copy(in, out); out.flush(); Extractor extractor = StringUtil.isBlank(mimeType) ? null : extractorFactory.getExtractor(mimeType); if (extractor == null) { final String detectedMimeType = getMimeType(out); extractor = extractorFactory.getExtractor(detectedMimeType);Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.1K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImplTest.java
assertContentType("application/pdf", "extractor/test.pdf", "hoge.pdf"); assertContentType("application/gzip", "extractor/gz/test.tar.gz", "hoge.tar.gz"); assertContentType("application/zip", "extractor/zip/test.zip", "hoge.zip"); assertContentType("application/x-lharc", "extractor/lha/test.lzh", "hoge.lzh"); // TODO is it correct? assertContentType("application/xml", "extractor/test.mm", "hoge.mm");
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 11.6K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ExtractorResourceManagementTest.java
final MsExcelExtractor extractor = container.getComponent("msExcelExtractor"); final AtomicBoolean streamClosed = new AtomicBoolean(false); try (final InputStream originalStream = ResourceUtil.getResourceAsStream("extractor/msoffice/test.xls")) { final InputStream trackableStream = createTrackableInputStream(originalStream, streamClosed); final ExtractData result = extractor.getText(trackableStream, null);
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 10.4K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java
throw new FessSystemException("Could not find extractorFactory."); } final Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType()); if (logger.isDebugEnabled()) { logger.debug("url={}, extractor={}", responseData.getUrl(), extractor); } return extractor; }
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Fri Nov 28 16:29:12 UTC 2025 - 3.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java
this.mailProperties = mailProperties; } /** * Extracts the body text from a MIME message. * * @param message the MIME message to extract text from * @return the extracted body text * @throws ExtractException if extraction fails */ protected String getBodyText(final MimeMessage message) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 12.6K bytes - Viewed (0)