- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 84 for EXTRACT (0.04 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java
/** Default encoding for Markdown files. */ protected String encoding = Constants.UTF_8; /** Whether to extract front matter as metadata. */ protected boolean extractFrontMatter = true; /** Whether to extract headings as metadata. */ protected boolean extractHeadings = true; /** Whether to extract link URLs as metadata. */ protected boolean extractLinks = false; /** Markdown parser with extensions. */Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 8.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java
} return field; } /** * Builds the extract data from headers and rows. * * @param headers the header row * @param rows the data rows * @return the extract data */ protected ExtractData buildExtractData(final String[] headers, final List<String[]> rows) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java
protected String lineSeparator = "\n"; /** Whether to extract top-level fields as metadata. */ protected boolean extractMetadata = true; /** Prefix for flattened nested keys. */ protected String nestedKeySeparator = "."; /** Maximum number of array elements to extract. */ protected int maxArrayElements = 100; /** * Constructs a new JsonExtractor.Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 9.7K bytes - Viewed (0) -
tests/test_tutorial/test_body/test_tutorial001.py
Registered: Sun Dec 28 07:19:09 UTC 2025 - Last Modified: Sat Dec 27 18:19:10 UTC 2025 - 10.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java
import jp.gr.java_conf.dangan.util.lha.LhaFile; import jp.gr.java_conf.dangan.util.lha.LhaHeader; /** * Extractor implementation for LHA (LZH) archive files. * This extractor can extract text content from files within LHA archives * by using appropriate extractors for each contained file type. * * @author shinsuke */ public class LhaExtractor extends AbstractExtractor {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 5.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java
/** * Constructs a new AbstractXmlExtractor. */ public AbstractXmlExtractor() { // NOP } /** * Returns the pattern used to extract encoding information from content. * @return The encoding pattern. */ protected abstract Pattern getEncodingPattern(); /** * Returns the pattern used to identify tags in the content.Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 8.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.fess.crawler.helper.MimeTypeHelper; /** * PdfExtractor extracts text content from PDF files using Apache PDFBox. * It supports password-protected PDFs and can extract embedded documents and annotations. * * <p>The extractor runs text extraction in a separate thread with a configurable timeout * to prevent hanging on problematic PDF files. It also extracts metadata from the PDF
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorEnhancedTest.java
assertTrue("Error message should contain the specific encoding", e.getMessage().contains(customEncoding)); assertTrue("Error message should indicate extraction failure", e.getMessage().contains("Failed to extract")); } finally { // Reset to default encoding textExtractor.setEncoding("UTF-8"); } } /** * Test extraction with empty input stream. */
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 8.9K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/PermissionHelper.java
import org.codelibs.jcifs.smb.SID; import jakarta.annotation.Resource; /** * Helper class for handling permission-related operations in Fess. * Provides functionality to encode/decode permission strings and extract * role type information from various file system protocols (SMB, file, FTP). */ public class PermissionHelper { /** Logger instance for this class */
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Thu Nov 13 05:54:52 UTC 2025 - 15.4K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ExtractorResourceManagementTest.java
// ExtractException with improved message assertTrue("Error message should contain context about Word document or extraction", e.getMessage().contains("Word") || e.getMessage().contains("extract")); } catch (final RuntimeException e) { // POI may throw IllegalArgumentException or other RuntimeExceptions // for invalid data, which is also acceptableRegistered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 10.4K bytes - Viewed (0)