Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 16 for ExtractData (0.05 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

                    throw exceptionSet.iterator().next();
                }
                writer.flush();
                final ExtractData extractData = new ExtractData(writer.toString());
                extractMetadata(document, extractData);
                return extractData;
            } catch (final Exception e) {
                throw new ExtractException(e);
            }
        }
    
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

            final ExtractData extractData = tikaExtractor.getText(in, null);
            final String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("ใƒ†ใ‚นใƒˆ"));
            for (final String key : extractData.getKeySet()) {
                logger.info("{}={}", key, String.join("|", extractData.getValues(key)));
            }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.6K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/ExtractDataTest.java

        }
    
        public void test_usingPredefinedConstants() {
            // Test using predefined constants
            ExtractData data = new ExtractData();
    
            data.putValue(ExtractData.RESOURCE_NAME_KEY, "test.pdf");
            data.putValue(ExtractData.URL, "https://example.com/test.pdf");
            data.putValues(ExtractData.FILE_PASSWORDS, new String[] { "pass1", "pass2" });
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 9.9K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug("ExtractData: {}", extractData);
                }
                // meta
                extractData.getKeySet().stream().filter(k -> extractData.getValues(k) != null).forEach(key -> {
                    final String[] values = extractData.getValues(key);
                    metaDataMap.put(key, values);
    
                    // meta -> content
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 25.7K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

                executeCommand(inputFile, outputFile);
    
                final ExtractData extractData = new ExtractData(new String(FileUtil.readBytes(outputFile), outputEncoding));
                if (StringUtil.isNotBlank(resourceName)) {
                    extractData.putValues("resourceName", new String[] { resourceName });
                }
    
                return extractData;
            } catch (final IOException e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 16.1K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java

                }
            }
    
            final ExtractData extractData = new ExtractData(textBuilder.toString().trim());
    
            // Add column metadata
            if (extractColumnMetadata && headers != null) {
                extractData.putValues("columns", headers);
                extractData.putValue("column_count", String.valueOf(headers.length));
            }
    
            extractData.putValue("row_count", String.valueOf(rows.size()));
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

                final ExtractData extractData = new ExtractData(getOutputContent(outputFile, outExt));
                if (StringUtil.isNotBlank(resourceName)) {
                    extractData.putValues("resourceName", new String[] { resourceName });
                }
    
                return extractData;
            } catch (final OfficeException e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 10.4K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java

     * <pre>
     * {@code
     * try (InputStream in = new FileInputStream("example.pdf")) {
     *     ExtractData extractData = new ExtractorBuilder(crawlerContainer, in, new HashMap<>())
     *         .mimeType("application/pdf")
     *         .filename("example.pdf")
     *         .maxContentLength(1024 * 1024)
     *         .extract();
     *
     *     String content = extractData.getContent();
     *     // Process the extracted content
     * } catch (IOException e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.1K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

                    }
                    final ExtractData extractData = new ExtractData(content);
                    final long contentLength;
                    if (isByteStream) {
                        contentLength = ((ByteArrayInputStream) inputStream).available();
                    } else {
                        contentLength = tempFile != null ? tempFile.length() : 0;
                    }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 30.8K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ArchiveExtractorErrorHandlingTest.java

    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.ResourceUtil;
    import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.6K bytes
    - Viewed (0)
Back to top