Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 32 for ExtractData (0.3 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

                    throw exceptionSet.iterator().next();
                }
                writer.flush();
                final ExtractData extractData = new ExtractData(writer.toString());
                extractMetadata(document, extractData);
                return extractData;
            } catch (final Exception e) {
                throw new ExtractException(e);
            }
        }
    
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractorTest.java

            final ExtractData extractData = markdownExtractor.getText(in, null);
            CloseableUtil.closeQuietly(in);
    
            // Verify front matter metadata
            final String[] titles = extractData.getValues("frontmatter.title");
            assertNotNull(titles);
            assertEquals("Sample Markdown Document", titles[0]);
    
            final String[] authors = extractData.getValues("frontmatter.author");
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 6.4K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java

         * @param params The parameters map, expected to contain ExtractData.RESOURCE_NAME_KEY
         * @return An ExtractData object containing the filename as content, or empty string if not found
         * @throws CrawlerSystemException if the input stream is null
         * @throws ExtractException if an unexpected error occurs during extraction
         */
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 2.7K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/ExtractDataTest.java

        }
    
        public void test_usingPredefinedConstants() {
            // Test using predefined constants
            ExtractData data = new ExtractData();
    
            data.putValue(ExtractData.RESOURCE_NAME_KEY, "test.pdf");
            data.putValue(ExtractData.URL, "https://example.com/test.pdf");
            data.putValues(ExtractData.FILE_PASSWORDS, new String[] { "pass1", "pass2" });
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 9.9K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug("ExtractData: {}", extractData);
                }
                // meta
                extractData.getKeySet().stream().filter(k -> extractData.getValues(k) != null).forEach(key -> {
                    final String[] values = extractData.getValues(key);
                    metaDataMap.put(key, values);
    
                    // meta -> content
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 25.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java

    import java.io.IOException;
    import java.io.InputStream;
    import java.util.Map;
    
    import org.apache.poi.hdgf.extractor.VisioTextExtractor;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Gets a text from . file.
     *
     * @author shinsuke
     *
     */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractorEnhancedTest.java

            final InputStream in = new ByteArrayInputStream(new byte[0]);
            final Map<String, String> params = new HashMap<>();
            params.put(ExtractData.RESOURCE_NAME_KEY, "test-document.pdf");
    
            final ExtractData result = filenameExtractor.getText(in, params);
    
            assertNotNull(result);
            assertEquals("test-document.pdf", result.getContent());
        }
    
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 7K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java

                final ExtractData extractData = new ExtractData(plainText);
    
                // Extract front matter metadata
                if (extractFrontMatter) {
                    extractFrontMatterMetadata(document, extractData);
                }
    
                // Extract headings
                if (extractHeadings) {
                    extractHeadingMetadata(document, extractData);
                }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 8.2K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java

                final ExtractData extractData = new ExtractData(textBuilder.toString().trim());
    
                if (extractMetadata) {
                    for (final Map.Entry<String, List<String>> entry : metadataMap.entrySet()) {
                        final List<String> values = entry.getValue();
                        extractData.putValues(entry.getKey(), values.toArray(new String[0]));
                    }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 9.7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java

                throw new ExtractException(e);
            }
        }
    
        /**
         * Creates an ExtractData object from the extracted content.
         * @param content The extracted content.
         * @return The ExtractData object.
         */
        protected ExtractData createExtractData(final String content) {
            return new ExtractData(extractString(content));
        }
    
        /**
         * Detects the encoding of the input stream.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 8.6K bytes
    - Viewed (0)
Back to top