- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 64 for extracted (0.9 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; /** * PdfExtractor extracts text content from PDF files using Apache PDFBox. * It supports password-protected PDFs and can extract embedded documents and annotations. *
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java
* @param outExt the output file extension * @return the extracted text content * @throws ExtractException if an error occurs while reading the file */ protected String getOutputContent(final File outputFile, final String outExt) { final Extractor extractor = getExtractor(outExt); if (extractor != null) { final Map<String, String> params = new HashMap<>();Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 10.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java
dfos = out; CopyUtil.copy(in, out); out.flush(); Extractor extractor = StringUtil.isBlank(mimeType) ? null : extractorFactory.getExtractor(mimeType); if (extractor == null) { final String detectedMimeType = getMimeType(out); extractor = extractorFactory.getExtractor(detectedMimeType);Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java
public void setMailProperties(final Properties mailProperties) { this.mailProperties = mailProperties; } /** * Extracts the body text from a MIME message. * * @param message the MIME message to extract text from * @return the extracted body text * @throws ExtractException if extraction fails */ protected String getBodyText(final MimeMessage message) {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 12.6K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/PermissionHelper.java
this.userPrefix = userPrefix; } /** * Extracts role type information from SMB (Server Message Block) response data. * Processes both SMB and SMB1 protocols to extract allowed and denied SIDs. * * @param responseData the response data containing SMB metadata * @return a list of role type strings extracted from the SMB permissions */Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Thu Nov 13 05:54:52 UTC 2025 - 15.4K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/ExtractDataTest.java
} public void test_contentGetterSetter() { // Test content getter/setter ExtractData data = new ExtractData(); String content = "This is extracted content"; data.setContent(content); assertEquals(content, data.getContent()); String newContent = "New content"; data.setContent(newContent);Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 9.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java
import org.xml.sax.InputSource; /** * {@link XpathTransformer} is a class that transforms HTML content into XML format based on XPath expressions. * It extracts data from an HTML document by applying XPath rules defined in {@link #fieldRuleMap}. * The extracted data is then formatted into an XML structure and stored in the {@link ResultData}. * <p>Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 13.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java
logger.warn("Failed to close HTTP client for API extractor", e); } } } /** * Extracts text from the input stream using the API endpoint. * * @param in the input stream to extract text from * @param params additional parameters * @return the extracted data * @throws ExtractException if extraction fails */ @Override
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 12.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java
throw new RobotsTxtException("Failed to parse robots.txt.", e); } } /** * Extracts the value from a line using the given pattern. * @param pattern the pattern to match against * @param line the line to extract the value from * @return the extracted value, or null if no match */ protected String getValue(final Pattern pattern, final String line) {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Fri Nov 14 12:52:01 UTC 2025 - 11.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java
import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content and metadata from CSV files. * This extractor provides better structured data extraction compared to Tika's generic text extraction. * * <p>Features: * <ul> * <li>Automatic delimiter detection (comma, tab, semicolon, pipe)</li>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 12.8K bytes - Viewed (0)