- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 19 for Extract (0.03 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java
/** * Sets the MIME type of the content to extract. * * @param mimeType the MIME type to set * @return this builder instance for method chaining */ public ExtractorBuilder mimeType(final String mimeType) { this.mimeType = mimeType; return this; } /** * Sets the filename of the content to extract. * * @param filename the filename to setRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java
} return null; } /** * Extracts URLs from HTML tag attributes using XPath. * * @param url the base URL for resolving relative URLs * @param document the document to extract URLs from * @param xpath the XPath expression to select elements * @param attr the attribute name to extract URLs from * @param encoding the character encoding to useRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 28.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.fess.crawler.helper.MimeTypeHelper; /** * PdfExtractor extracts text content from PDF files using Apache PDFBox. * It supports password-protected PDFs and can extract embedded documents and annotations. * * <p>The extractor runs text extraction in a separate thread with a configurable timeout * to prevent hanging on problematic PDF files. It also extracts metadata from the PDF * document and includes it in the extraction result. *
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 12.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java
} catch (final MessagingException e) { throw new ExtractException(e); } } /** * Puts a value into the extract data with appropriate type conversion. * * @param data the extract data to store the value in * @param key the key for the value * @param value the value to store */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 12.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java
} return extractData; } catch (final OfficeException e) { throw new ExtractException("Could not extract a content.", e); } finally { FileUtil.deleteInBackground(inputFile); FileUtil.deleteInBackground(outputFile); } } /**Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java
} catch (final IOException e) { logger.error("Failed to close httpClient.", e); } } } /** * Extracts text from the input stream using the API endpoint. * * @param in the input stream to extract text from * @param params additional parameters * @return the extracted data * @throws ExtractException if extraction fails */ @Override
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 12.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java
* using the Apache Tika library. It extends {@link PasswordBasedExtractor} to handle password-protected files. * </p> * * <p> * This class provides methods to extract text from an input stream, handling different scenarios such as: * </p> * <ul> * <li>Normalizing text content</li> * <li>Handling resource names and content types</li>Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 30.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/net/protocol/storage/Handler.java
private StatObjectResponse statObject; /** * Constructs a new StorageURLConnection for the specified URL. * This constructor parses the URL to extract bucket and object names. * * @param url The storage URL to connect to */ protected StorageURLConnection(final URL url) { super(url);Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/form/FormScheme.java
logger.debug("Logged in {}", originalLoginUrl); } }); } /** * Parses the token page and extracts token information. * @param tokenPattern The regex pattern to extract the token. * @param responseParams The list to store response parameters. * @param entity The HTTP entity containing the token page content. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 14.3K bytes - Viewed (1) -
src/main/java/org/codelibs/fess/suggest/util/SuggestUtil.java
} } return keywords.toArray(new String[keywords.size()]); } /** * Extracts keywords from the given query string based on the specified fields. * * @param q the query string to parse and extract keywords from * @param fields the fields to consider when extracting keywords * @return a list of unique keywords extracted from the query stringRegistered: Fri Sep 19 09:08:11 UTC 2025 - Last Modified: Mon Sep 01 13:33:03 UTC 2025 - 17.4K bytes - Viewed (0)