Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 8 of 8 for textExtractor (0.08 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java

    /**
     * Extracts text content from an input stream as plain text.
     */
    public class TextExtractor extends AbstractExtractor {
    
        /**
         * The encoding for text.
         */
        protected String encoding = Constants.UTF_8;
    
        /**
         * Creates a new TextExtractor instance.
         */
        public TextExtractor() {
            super();
        }
    
        @Override
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 2K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorEnhancedTest.java

            final String originalEncoding = textExtractor.getEncoding();
            assertEquals("UTF-8", originalEncoding);
    
            textExtractor.setEncoding("ISO-8859-1");
            assertEquals("ISO-8859-1", textExtractor.getEncoding());
    
            // Reset
            textExtractor.setEncoding("UTF-8");
            assertEquals("UTF-8", textExtractor.getEncoding());
        }
    
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorTest.java

        public TextExtractor textExtractor;
    
        @Override
        protected void setUp() throws Exception {
            super.setUp();
            StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("textExtractor", TextExtractor.class);
            textExtractor = container.getComponent("textExtractor");
        }
    
        public void test_getText() {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 2K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ExtractorResourceManagementTest.java

            }
        }
    
        /**
         * Test that TextExtractor throws exception with encoding information.
         */
        public void test_TextExtractor_includesEncodingInErrorMessage() {
            final TextExtractor extractor = container.getComponent("textExtractor");
            // Create a stream that will cause an encoding error
            final InputStream errorStream = new InputStream() {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 10.4K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ArchiveExtractorErrorHandlingTest.java

                        final TextExtractor textExtractor = container.getComponent("textExtractor");
                        final ZipExtractor zipExtractor = container.getComponent("zipExtractor");
                        final TarExtractor tarExtractor = container.getComponent("tarExtractor");
                        factory.addExtractor("text/plain", textExtractor);
                        factory.addExtractor("text/html", tikaExtractor);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.6K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/EXTRACTOR_TESTS_README.md

    - Improved error messages with context
    - Input validation using `validateInputStream()`
    
    **Covered Extractors**:
    - MsWordExtractor
    - MsExcelExtractor
    - MsPowerPointExtractor
    - TextExtractor
    
    **Test Count**: 8 tests
    
    **Key Scenarios**:
    - ✅ Successful extraction closes resources properly
    - ✅ Failed extraction includes descriptive error messages
    - ✅ Null input stream validation
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 5.7K bytes
    - Viewed (0)
  7. fess-crawler-lasta/src/main/resources/crawler/extractor.xml

    		class="org.codelibs.fess.crawler.extractor.impl.PdfExtractor" />
    	<component name="lhaExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.LhaExtractor" />
    	<component name="textExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.TextExtractor" />
    	<component name="htmlExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.HtmlExtractor">
    		<property name="featureMap">
    			<component class="java.util.LinkedHashMap">
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 50.1K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractorTest.java

            public void testValidateInputStream(final InputStream in) {
                validateInputStream(in);
            }
        }
    
        private TestExtractor extractor;
    
        @Override
        protected void setUp() throws Exception {
            super.setUp();
            extractor = new TestExtractor();
        }
    
        /**
         * Test that validateInputStream accepts non-null input streams.
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 8.4K bytes
    - Viewed (0)
Back to top