Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 24 for AbstractExtractor (0.05 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractor.java

     * </p>
     *
     */
    public abstract class AbstractExtractor implements Extractor {
    
        /** The crawler container. */
        @Resource
        protected CrawlerContainer crawlerContainer;
    
        /**
         * Constructs a new AbstractExtractor.
         */
        public AbstractExtractor() {
            // NOP
        }
    
        /**
         * Registers this extractor with the ExtractorFactory.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 4.6K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractorTest.java

    /**
     * Test class for AbstractExtractor base functionality.
     * Focuses on testing the validateInputStream() method and other common functionality.
     */
    public class AbstractExtractorTest extends PlainTestCase {
    
        /**
         * Concrete test implementation of AbstractExtractor for testing purposes.
         */
        private static class TestExtractor extends AbstractExtractor {
            private boolean validateCalled = false;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 8.4K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Gets a text from . file.
     *
     * @author shinsuke
     *
     */
    public class MsVisioExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsVisioExtractor instance.
         */
        public MsVisioExtractor() {
            super();
        }
    
        /**
         * Extracts text from the Visio input stream.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content from Microsoft PowerPoint documents.
     */
    public class MsPowerPointExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsPowerPointExtractor instance.
         */
        public MsPowerPointExtractor() {
            super();
        }
    
        /**
         * Extracts text from the PowerPoint input stream.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 2K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java

    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content from an input stream as plain text.
     */
    public class TextExtractor extends AbstractExtractor {
    
        /**
         * The encoding for text.
         */
        protected String encoding = Constants.UTF_8;
    
        /**
         * Creates a new TextExtractor instance.
         */
        public TextExtractor() {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 2K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsExcelExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Gets a text from .xls file.
     *
     * @author shinsuke
     *
     */
    public class MsExcelExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsExcelExtractor instance.
         */
        public MsExcelExtractor() {
            super();
        }
    
        /**
         * Extracts text from the Excel input stream.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Gets a text from .doc file.
     *
     * @author shinsuke
     *
     */
    public class MsWordExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsWordExtractor instance.
         */
        public MsWordExtractor() {
            super();
        }
    
        /**
         * Extracts text from the Word input stream.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 1.7K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java

     *
     * <p>The filename is retrieved from the {@link ExtractData#RESOURCE_NAME_KEY} parameter.
     * If this parameter is not present, an empty string is returned as the content.</p>
     */
    public class FilenameExtractor extends AbstractExtractor {
    
        /**
         * Constructs a new FilenameExtractor.
         */
        public FilenameExtractor() {
            // Default constructor
        }
    
        /**
         * Extracts the filename from the parameters.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 2.7K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/EXTRACTOR_TESTS_README.md

    - ✅ Handles empty archives gracefully
    - ✅ Mixed valid/invalid entries processed correctly
    - ✅ Null input stream validation
    
    ---
    
    ### 4. AbstractExtractorTest.java
    **Purpose**: Test the base AbstractExtractor functionality.
    
    **Key Test Areas**:
    - `validateInputStream()` method behavior
    - Exception types and messages
    - Stream consumption (should not consume)
    - Consistency across multiple calls
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 5.7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Gets a text from . file.
     *
     * @author shinsuke
     *
     */
    public class MsPublisherExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsPublisherExtractor instance.
         */
        public MsPublisherExtractor() {
            super();
        }
    
        /**
         * Extracts text from the Publisher input stream.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 2K bytes
    - Viewed (0)
Back to top