Search Options

Results per page
Sort
Preferred Languages
Advance

Results 11 - 20 of 49 for ExtractData (0.13 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractorTest.java

            extractor.command = getCommand(scriptFile);
            final Map<String, String> params = new HashMap<String, String>();
            params.put(ExtractData.RESOURCE_NAME_KEY, "hoge/fuga.txt");
            final ExtractData text = extractor.getText(new FileInputStream(contentFile), params);
            assertEquals(content, text.getContent());
        }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 9.8K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java

    import java.io.IOException;
    import java.io.InputStream;
    import java.util.Map;
    
    import org.apache.poi.hdgf.extractor.VisioTextExtractor;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Gets a text from . file.
     *
     * @author shinsuke
     *
     */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractorEnhancedTest.java

            final InputStream in = new ByteArrayInputStream(new byte[0]);
            final Map<String, String> params = new HashMap<>();
            params.put(ExtractData.RESOURCE_NAME_KEY, "test-document.pdf");
    
            final ExtractData result = filenameExtractor.getText(in, params);
    
            assertNotNull(result);
            assertEquals("test-document.pdf", result.getContent());
        }
    
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 7K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java

                final ExtractData extractData = new ExtractData(plainText);
    
                // Extract front matter metadata
                if (extractFrontMatter) {
                    extractFrontMatterMetadata(document, extractData);
                }
    
                // Extract headings
                if (extractHeadings) {
                    extractHeadingMetadata(document, extractData);
                }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 8.2K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java

                final ExtractData extractData = new ExtractData(textBuilder.toString().trim());
    
                if (extractMetadata) {
                    for (final Map.Entry<String, List<String>> entry : metadataMap.entrySet()) {
                        final List<String> values = entry.getValue();
                        extractData.putValues(entry.getKey(), values.toArray(new String[0]));
                    }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 9.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java

                throw new ExtractException(e);
            }
        }
    
        /**
         * Creates an ExtractData object from the extracted content.
         * @param content The extracted content.
         * @return The ExtractData object.
         */
        protected ExtractData createExtractData(final String content) {
            return new ExtractData(extractString(content));
        }
    
        /**
         * Detects the encoding of the input stream.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 8.6K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractorTest.java

            final ExtractData extractData = jsonExtractor.getText(in, null);
            CloseableUtil.closeQuietly(in);
    
            // Verify no metadata extracted
            assertNull(extractData.getValues("title"));
            assertNull(extractData.getValues("author"));
    
            // But content should still be present
            final String content = extractData.getContent();
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 4.7K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractorTest.java

            final ExtractData extractData = csvExtractor.getText(in, null);
            CloseableUtil.closeQuietly(in);
    
            // Verify no column metadata
            assertNull(extractData.getValues("columns"));
    
            // But content should still be present
            final String content = extractData.getContent();
            assertTrue(content.contains("John Doe"));
        }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 5.3K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java

         */
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            validateInputStream(in);
            try (final HSLFSlideShow slideShow = new HSLFSlideShow(in);
                    final SlideShowExtractor<HSLFShape, HSLFTextParagraph> extractor = new SlideShowExtractor<>(slideShow)) {
                return new ExtractData(extractor.getText());
            } catch (final IOException e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 2K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java

         */
        public TextExtractor() {
            super();
        }
    
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            validateInputStream(in);
            try {
                final String content = new String(InputStreamUtil.getBytes(in), getEncoding());
                return new ExtractData(content);
            } catch (final Exception e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 2K bytes
    - Viewed (0)
Back to top