- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 29 for ExtractData (0.06 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java
protected String content; /** * Constructs a new ExtractData. */ public ExtractData() { // Default constructor } /** * Constructs a new ExtractData with the specified content. * * @param content the content to set */ public ExtractData(final String content) { this.content = content; } /**
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 3.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
throw exceptionSet.iterator().next(); } writer.flush(); final ExtractData extractData = new ExtractData(writer.toString()); extractMetadata(document, extractData); return extractData; } catch (final Exception e) { throw new ExtractException(e); } } /**
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 12.7K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java
final ExtractData extractData = tikaExtractor.getText(in, null); final String content = extractData.getContent(); CloseableUtil.closeQuietly(in); logger.info(content); assertTrue(content.contains("ใในใ")); for (final String key : extractData.getKeySet()) { logger.info("{}={}", key, String.join("|", extractData.getValues(key))); }
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 30.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java
* Extracts the filename from the parameters. * @param in The input stream (not used). * @param params The parameters, expected to contain ExtractData.RESOURCE_NAME_KEY. * @return An ExtractData object containing the filename as content. */ @Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 1.9K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Aug 07 03:06:29 UTC 2025 - 25.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java
try { final ExtractData extractData = new ExtractData( StreamUtil.stream(getStringsByXPath(document, contentXpath)).get(stream -> stream.collect(Collectors.joining(" ")))); metadataXpathMap.entrySet().stream().forEach(e -> { extractData.putValues(e.getKey(), getStringsByXPath(document, e.getValue())); }); return extractData; } finally {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 9.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java
import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.hslf.usermodel.HSLFTextParagraph; import org.apache.poi.sl.extractor.SlideShowExtractor; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content from Microsoft PowerPoint documents. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java
*/ public TextExtractor() { super(); } @Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new CrawlerSystemException("The inputstream is null."); } try { return new ExtractData(new String(InputStreamUtil.getBytes(in), getEncoding())); } catch (final Exception e) {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java
* <pre> * {@code * try (InputStream in = new FileInputStream("example.pdf")) { * ExtractData extractData = new ExtractorBuilder(crawlerContainer, in, new HashMap<>()) * .mimeType("application/pdf") * .filename("example.pdf") * .maxContentLength(1024 * 1024) * .extract(); * * String content = extractData.getContent(); * // Process the extracted content * } catch (IOException e) {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java
executeCommand(inputFile, outputFile); final ExtractData extractData = new ExtractData(new String(FileUtil.readBytes(outputFile), outputEncoding)); if (StringUtil.isNotBlank(resourceName)) { extractData.putValues("resourceName", new String[] { resourceName }); } return extractData; } catch (final IOException e) {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 16K bytes - Viewed (0)