Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 35 for ExtractData (0.05 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java

        protected String content;
    
        /**
         * Constructs a new ExtractData.
         */
        public ExtractData() {
            // Default constructor
        }
    
        /**
         * Constructs a new ExtractData with the specified content.
         *
         * @param content the content to set
         */
        public ExtractData(final String content) {
            this.content = content;
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

                    throw exceptionSet.iterator().next();
                }
                writer.flush();
                final ExtractData extractData = new ExtractData(writer.toString());
                extractMetadata(document, extractData);
                return extractData;
            } catch (final Exception e) {
                throw new ExtractException(e);
            }
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.7K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

            final ExtractData extractData = tikaExtractor.getText(in, null);
            final String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
            for (final String key : extractData.getKeySet()) {
                logger.info("{}={}", key, String.join("|", extractData.getValues(key)));
            }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.6K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractorTest.java

            extractor.command = getCommand(scriptFile);
            final Map<String, String> params = new HashMap<String, String>();
            params.put(ExtractData.RESOURCE_NAME_KEY, "hoge/fuga.txt");
            final ExtractData text = extractor.getText(new FileInputStream(contentFile), params);
            assertEquals(content, text.getContent());
        }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 9.8K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java

    import org.apache.poi.hslf.usermodel.HSLFSlideShow;
    import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
    import org.apache.poi.sl.extractor.SlideShowExtractor;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content from Microsoft PowerPoint documents.
     */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java

         */
        public TextExtractor() {
            super();
        }
    
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            if (in == null) {
                throw new CrawlerSystemException("The inputstream is null.");
            }
            try {
                return new ExtractData(new String(InputStreamUtil.getBytes(in), getEncoding()));
            } catch (final Exception e) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java

         * Extracts the filename from the parameters.
         * @param in The input stream (not used).
         * @param params The parameters, expected to contain ExtractData.RESOURCE_NAME_KEY.
         * @return An ExtractData object containing the filename as content.
         */
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            if (in == null) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java

    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.CopyUtil;
    import org.codelibs.core.io.FileUtil;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 5.8K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

                executeCommand(inputFile, outputFile);
    
                final ExtractData extractData = new ExtractData(new String(FileUtil.readBytes(outputFile), outputEncoding));
                if (StringUtil.isNotBlank(resourceName)) {
                    extractData.putValues("resourceName", new String[] { resourceName });
                }
    
                return extractData;
            } catch (final IOException e) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 16K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JodExtractorTest.java

            Map<String, String> params = new HashMap<String, String>();
            params.put("resourceName", "test.odt");
            ExtractData extractData = jodExtractor.getText(in, params);
            String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
        }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 9.5K bytes
    - Viewed (0)
Back to top