- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 353 for extracted (0.32 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; /** * PdfExtractor extracts text content from PDF files using Apache PDFBox. * It supports password-protected PDFs and can extract embedded documents and annotations. *
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import org.codelibs.fess.crawler.util.IgnoreCloseInputStream; import jp.gr.java_conf.dangan.util.lha.LhaFile; import jp.gr.java_conf.dangan.util.lha.LhaHeader; /** * Extractor implementation for LHA (LZH) archive files.
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 5.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java
*/ public Set<String> getKeySet() { return metadata.keySet(); } /** * Gets the extracted content. * * @return the extracted content */ public String getContent() { return content; } /** * Sets the extracted content. * * @param content the content to set */ public void setContent(final String content) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 3.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java
* optionally override the default weight value. */ public interface Extractor { /** * Extracts text data from the given input stream. * * @param in the input stream to extract text from * @param params a map of parameters to be used during extraction * @return an ExtractData object containing the extracted text */ ExtractData getText(InputStream in, Map<String, String> params); /**Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java
protected int maxDepth = 10; /** Separator for key-value pairs in extracted text. */ protected String fieldSeparator = ": "; /** Separator between different fields in extracted text. */ protected String lineSeparator = "\n"; /** Whether to extract top-level fields as metadata. */ protected boolean extractMetadata = true; /** Prefix for flattened nested keys. */Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 9.7K bytes - Viewed (0) -
docs/debugging/inspect/decrypt-v2.go
} sr.ReturnNonDecryptable(true) // Debug corrupted streams. if false { sr.SkipEncrypted(true) return sr.DebugStream(os.Stdout) } extracted := false for { stream, err := sr.NextStream() if err != nil { if err == io.EOF { if extracted { return nil } return errors.New("no data found on stream") } if errors.Is(err, estream.ErrNoKey) {
Registered: Sun Dec 28 19:28:13 UTC 2025 - Last Modified: Mon Feb 17 17:09:42 UTC 2025 - 2.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java
throw new ExtractException(e); } } return encoding; } /** * Extracts text content from the given content by removing tags and processing attributes. * @param content The content to extract from. * @return The extracted text. */ protected String extractString(final String content) { String input = content.replaceAll("[\\r\\n]", " ");
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 8.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/TextTransformer.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import jakarta.annotation.Resource; /** * TextTransformer is a class that transforms a ResponseData object into a ResultData object containing the extracted text content. * It uses an Extractor to extract the text from the response body based on the MIME type.
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 6.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java
* @param outExt the output file extension * @return the extracted text content * @throws ExtractException if an error occurs while reading the file */ protected String getOutputContent(final File outputFile, final String outExt) { final Extractor extractor = getExtractor(outExt); if (extractor != null) { final Map<String, String> params = new HashMap<>();Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 10.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java
/** * Creates a new MsVisioExtractor instance. */ public MsVisioExtractor() { super(); } /** * Extracts text from the Visio input stream. * @param in The input stream. * @param params The parameters. * @return The extracted data. */ @Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 1.9K bytes - Viewed (0)