Search Options

Results per page
Sort
Preferred Languages
Advance

Results 11 - 18 of 18 for AbstractExtractor (0.07 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

    import com.google.common.base.Charsets;
    
    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.PreDestroy;
    
    /**
     * Extract a text by using external http server.
     */
    public class ApiExtractor extends AbstractExtractor {
    
        private static final Logger logger = LogManager.getLogger(ApiExtractor.class);
    
        /** The URL of the API endpoint. */
        protected String url;
    
        /** The access timeout in seconds. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

    import org.codelibs.fess.crawler.util.IgnoreCloseInputStream;
    
    import jakarta.annotation.Resource;
    
    /**
     * Extracts text content from TAR archives.
     */
    public class TarExtractor extends AbstractExtractor {
        private static final Logger logger = LogManager.getLogger(TarExtractor.class);
    
        /**
         * The archive stream factory.
         */
        @Resource
        protected ArchiveStreamFactory archiveStreamFactory;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 5K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java

     * The extractor first tries to match against the URL, then falls back to the resource name if available.
     *
     * @author shinsuke
     */
    public abstract class PasswordBasedExtractor extends AbstractExtractor {
    
        /** Logger instance for this class. */
        private static final Logger logger = LogManager.getLogger(PasswordBasedExtractor.class);
    
        /** Map of regex patterns to passwords for static password configuration. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java

    import jakarta.mail.internet.MimeMessage;
    import jakarta.mail.internet.MimeUtility;
    
    /**
     * Gets a text from .eml file.
     *
     * @author shinsuke
     *
     */
    public class EmlExtractor extends AbstractExtractor {
        /** Array of day of week abbreviations used for parsing received dates */
        private static final String[] DAY_OF_WEEK = { "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" };
    
        /** Logger instance for this class */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.6K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java

     * It handles encoding detection, HTML entity unescaping, and tag-based content extraction.
     *
     */
    public abstract class AbstractXmlExtractor extends AbstractExtractor {
    
        /**
         * Logger for this class.
         */
        protected static final Logger logger = LogManager.getLogger(AbstractXmlExtractor.class);
    
        /**
         * UTF-7 Byte Order Mark definition.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.5K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.PreDestroy;
    
    /**
     * Extracts text content from various document formats using JODConverter.
     */
    public class JodExtractor extends AbstractExtractor {
        /** Logger for this class. */
        private static final Logger logger = LogManager.getLogger(JodExtractor.class);
    
        /** Office manager for document conversion. */
        protected OfficeManager officeManager;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content by executing an external command.
     */
    public class CommandExtractor extends AbstractExtractor {
        private static final Logger logger = LogManager.getLogger(CommandExtractor.class);
    
        /** The encoding for the output. */
        protected String outputEncoding = Constants.UTF_8;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 16K bytes
    - Viewed (0)
  8. README.md

    }
    
    // Wait for completion
    crawler.awaitTermination();
    System.out.println("Crawling completed");
    ```
    
    ### Custom Content Extractor
    
    ```java
    public class CustomExtractor extends AbstractExtractor {
        @Override
        public ExtractData getText(final InputStream inputStream, final Map<String, String> params) {
            // Custom extraction logic
            ExtractData extractData = new ExtractData();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
Back to top