Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 34 for ExtractData (0.06 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java

        public static final String FILE_PASSWORDS = "file.passwords";
    
        protected Map<String, String[]> metadata = new HashMap<>();
    
        protected String content;
    
        public ExtractData() {
            // nothing
        }
    
        public ExtractData(final String content) {
            this.content = content;
        }
    
        public void putValues(final String key, final String[] values) {
            metadata.put(key, values);
        }
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 2.5K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

                    throw exceptionSet.iterator().next();
                }
                writer.flush();
                final ExtractData extractData = new ExtractData(writer.toString());
                extractMetadata(document, extractData);
                return extractData;
            } catch (final Exception e) {
                throw new ExtractException(e);
            }
        }
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 9.8K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

            final ExtractData extractData = tikaExtractor.getText(in, null);
            final String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
            for (final String key : extractData.getKeySet()) {
                logger.info("{}={}", key, String.join("|", extractData.getValues(key)));
            }
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 30.5K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JodExtractorTest.java

            Map<String, String> params = new HashMap<String, String>();
            params.put("resourceName", "test.odt");
            ExtractData extractData = jodExtractor.getText(in, params);
            String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
        }
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 9.5K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java

            final ExtractData extractData = pdfExtractor.getText(in, null);
            final String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
            assertEquals("Writer", extractData.getValues("Creator")[0]);
            assertEquals("OpenOffice.org 3.0", extractData.getValues("Producer")[0]);
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 7.6K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

            try {
                final ExtractData extractData = new ExtractData(
                        StreamUtil.stream(getStringsByXPath(document, contentXpath)).get(stream -> stream.collect(Collectors.joining(" "))));
                metadataXpathMap.entrySet().stream().forEach(e -> {
                    extractData.putValues(e.getKey(), getStringsByXPath(document, e.getValue()));
                });
                return extractData;
            } finally {
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 6.9K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java

    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * @author shinsuke
     *
     */
    public class FilenameExtractor extends AbstractExtractor {
    
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 1.5K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug("ExtractData: {}", extractData);
                }
                // meta
                extractData.getKeySet().stream().filter(k -> extractData.getValues(k) != null).forEach(key -> {
                    final String[] values = extractData.getValues(key);
                    metaDataMap.put(key, values);
    
                    // meta -> content
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Fri Oct 11 21:11:58 UTC 2024
    - 23.6K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractorTest.java

            extractor.command = getCommand(scriptFile);
            final Map<String, String> params = new HashMap<String, String>();
            params.put(ExtractData.RESOURCE_NAME_KEY, "hoge/fuga.txt");
            final ExtractData text = extractor.getText(new FileInputStream(contentFile), params);
            assertEquals(content, text.getContent());
        }
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 9.8K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java

         */
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            if (in == null) {
                throw new CrawlerSystemException("The inputstream is null.");
            }
            try {
                @SuppressWarnings("resource")
                final VisioTextExtractor visioTextExtractor = new VisioTextExtractor(in);
                return new ExtractData(visioTextExtractor.getText());
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 1.7K bytes
    - Viewed (0)
Back to top