- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 152 for extracted (1.44 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; /** * PdfExtractor extracts text content from PDF files using Apache PDFBox. * It supports password-protected PDFs and can extract embedded documents and annotations. *
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java
*/ public Set<String> getKeySet() { return metadata.keySet(); } /** * Gets the extracted content. * * @return the extracted content */ public String getContent() { return content; } /** * Sets the extracted content. * * @param content the content to set */ public void setContent(final String content) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 3.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import org.codelibs.fess.crawler.util.IgnoreCloseInputStream; import jp.gr.java_conf.dangan.util.lha.LhaFile; import jp.gr.java_conf.dangan.util.lha.LhaHeader; /** * Extractor implementation for LHA (LZH) archive files.
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 5.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java
* optionally override the default weight value. */ public interface Extractor { /** * Extracts text data from the given input stream. * * @param in the input stream to extract text from * @param params a map of parameters to be used during extraction * @return an ExtractData object containing the extracted text */ ExtractData getText(InputStream in, Map<String, String> params); /**Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/TextTransformer.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import jakarta.annotation.Resource; /** * TextTransformer is a class that transforms a ResponseData object into a ResultData object containing the extracted text content. * It uses an Extractor to extract the text from the response body based on the MIME type.
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 6.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java
* @param outExt the output file extension * @return the extracted text content * @throws ExtractException if an error occurs while reading the file */ protected String getOutputContent(final File outputFile, final String outExt) { final Extractor extractor = getExtractor(outExt); if (extractor != null) { final Map<String, String> params = new HashMap<>();Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 10.4K bytes - Viewed (0) -
src/test/java/jcifs/smb/compression/DefaultCompressionServiceTest.java
assertEquals(testData.length, writtenBytes); byte[] extracted = new byte[testData.length]; System.arraycopy(outputBuffer, 50, extracted, 0, testData.length); assertArrayEquals(testData, extracted); } @Test @DisplayName("Test small data handling") public void testSmallDataHandling() throws CIFSException {Registered: Sat Dec 20 13:44:44 UTC 2025 - Last Modified: Sun Aug 31 08:00:57 UTC 2025 - 9.1K bytes - Viewed (0) -
fess-crawler/src/test/resources/extractor/markdown/test.md
--- title: Sample Markdown Document author: John Doe date: 2025-01-15 tags: - crawler - extractor - markdown --- # Introduction This is a sample Markdown document for testing the MarkdownExtractor. ## Features The extractor should handle: - YAML front matter extraction - Heading structure - **Bold text** and *italic text* - Lists and other formatting ### Code Examples
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 767 bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java
* Creates a new MsPublisherExtractor instance. */ public MsPublisherExtractor() { super(); } /** * Extracts text from the Publisher input stream. * @param in The input stream. * @param params The parameters. * @return The extracted data. */ @Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java
protected int maxDepth = 10; /** Separator for key-value pairs in extracted text. */ protected String fieldSeparator = ": "; /** Separator between different fields in extracted text. */ protected String lineSeparator = "\n"; /** Whether to extract top-level fields as metadata. */ protected boolean extractMetadata = true; /** Prefix for flattened nested keys. */Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 9.7K bytes - Viewed (0)