- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 639 for extract (0.04 sec)
-
.github/workflows/extract-unit-test-split.jq
Stefan Wolf <******@****.***> 1633938695 +0200
Registered: Wed Dec 31 11:36:14 UTC 2025 - Last Modified: Mon Oct 11 08:08:26 UTC 2021 - 171 bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java
/** Default encoding for Markdown files. */ protected String encoding = Constants.UTF_8; /** Whether to extract front matter as metadata. */ protected boolean extractFrontMatter = true; /** Whether to extract headings as metadata. */ protected boolean extractHeadings = true; /** Whether to extract link URLs as metadata. */ protected boolean extractLinks = false; /** Markdown parser with extensions. */Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 8.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java
dfos = out; CopyUtil.copy(in, out); out.flush(); Extractor extractor = StringUtil.isBlank(mimeType) ? null : extractorFactory.getExtractor(mimeType); if (extractor == null) { final String detectedMimeType = getMimeType(out); extractor = extractorFactory.getExtractor(detectedMimeType);Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java
protected int maxDepth = 10; /** Separator for key-value pairs in extracted text. */ protected String fieldSeparator = ": "; /** Separator between different fields in extracted text. */ protected String lineSeparator = "\n"; /** Whether to extract top-level fields as metadata. */ protected boolean extractMetadata = true; /** Prefix for flattened nested keys. */Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 9.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java
} return field; } /** * Builds the extract data from headers and rows. * * @param headers the header row * @param rows the data rows * @return the extract data */ protected ExtractData buildExtractData(final String[] headers, final List<String[]> rows) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java
* optionally override the default weight value. */ public interface Extractor { /** * Extracts text data from the given input stream. * * @param in the input stream to extract text from * @param params a map of parameters to be used during extraction * @return an ExtractData object containing the extracted text */ ExtractData getText(InputStream in, Map<String, String> params); /**Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; /** * PdfExtractor extracts text content from PDF files using Apache PDFBox. * It supports password-protected PDFs and can extract embedded documents and annotations. *
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java
public void setMailProperties(final Properties mailProperties) { this.mailProperties = mailProperties; } /** * Extracts the body text from a MIME message. * * @param message the MIME message to extract text from * @return the extracted body text * @throws ExtractException if extraction fails */ protected String getBodyText(final MimeMessage message) {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 12.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java
throw new ExtractException(e); } } return encoding; } /** * Extracts text content from the given content by removing tags and processing attributes. * @param content The content to extract from. * @return The extracted text. */ protected String extractString(final String content) { String input = content.replaceAll("[\\r\\n]", " ");
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 8.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import org.codelibs.fess.crawler.util.IgnoreCloseInputStream; import jp.gr.java_conf.dangan.util.lha.LhaFile; import jp.gr.java_conf.dangan.util.lha.LhaHeader; /** * Extractor implementation for LHA (LZH) archive files.
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 5.9K bytes - Viewed (0)