- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 98 for Extract (1.07 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java
/** Default encoding for Markdown files. */ protected String encoding = Constants.UTF_8; /** Whether to extract front matter as metadata. */ protected boolean extractFrontMatter = true; /** Whether to extract headings as metadata. */ protected boolean extractHeadings = true; /** Whether to extract link URLs as metadata. */ protected boolean extractLinks = false; /** Markdown parser with extensions. */Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 8.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java
protected String lineSeparator = "\n"; /** Whether to extract top-level fields as metadata. */ protected boolean extractMetadata = true; /** Prefix for flattened nested keys. */ protected String nestedKeySeparator = "."; /** Maximum number of array elements to extract. */ protected int maxArrayElements = 100; /** * Constructs a new JsonExtractor.Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 9.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java
} return field; } /** * Builds the extract data from headers and rows. * * @param headers the header row * @param rows the data rows * @return the extract data */ protected ExtractData buildExtractData(final String[] headers, final List<String[]> rows) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 12.8K bytes - Viewed (0) -
tests/test_tutorial/test_body/test_tutorial001.py
Registered: Sun Dec 28 07:19:09 UTC 2025 - Last Modified: Sat Dec 27 18:19:10 UTC 2025 - 10.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.fess.crawler.helper.MimeTypeHelper; /** * PdfExtractor extracts text content from PDF files using Apache PDFBox. * It supports password-protected PDFs and can extract embedded documents and annotations. * * <p>The extractor runs text extraction in a separate thread with a configurable timeout * to prevent hanging on problematic PDF files. It also extracts metadata from the PDF * document and includes it in the extraction result. *
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java
throw new ExtractException(e); } } return encoding; } /** * Extracts text content from the given content by removing tags and processing attributes. * @param content The content to extract from. * @return The extracted text. */ protected String extractString(final String content) {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 8.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java
import jp.gr.java_conf.dangan.util.lha.LhaFile; import jp.gr.java_conf.dangan.util.lha.LhaHeader; /** * Extractor implementation for LHA (LZH) archive files. * This extractor can extract text content from files within LHA archives * by using appropriate extractors for each contained file type. * * @author shinsuke */ public class LhaExtractor extends AbstractExtractor {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 5.9K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/PermissionHelper.java
*/ public void setUserPrefix(final String userPrefix) { this.userPrefix = userPrefix; } /** * Extracts role type information from SMB (Server Message Block) response data. * Processes both SMB and SMB1 protocols to extract allowed and denied SIDs. * * @param responseData the response data containing SMB metadataRegistered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Thu Nov 13 05:54:52 UTC 2025 - 15.4K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ArchiveExtractorErrorHandlingTest.java
assertTrue("Error message should mention TAR archive or extraction failure", e.getMessage().contains("TAR") || e.getMessage().contains("extract")); } } /** * Test that ZipExtractor successfully extracts from valid archive. */ public void test_ZipExtractor_validArchive_extractsSuccessfully() {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 12.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java
} catch (final Exception e) { failedEntries++; if (logger.isDebugEnabled()) { logger.debug("Failed to extract content from archive entry: {}", filename, e); } } } } } } catch (final MaxLengthExceededException e) {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 4.8K bytes - Viewed (0)