Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 5 of 5 for AbstractExtractor (0.16 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

    import com.google.common.base.Charsets;
    
    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.PreDestroy;
    
    /**
     * Extract a text by using external http server.
     */
    public class ApiExtractor extends AbstractExtractor {
    
        private static final Logger logger = LogManager.getLogger(ApiExtractor.class);
    
        /** The URL of the API endpoint. */
        protected String url;
    
        /** The access timeout in seconds. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java

    import jakarta.mail.internet.MimeMessage;
    import jakarta.mail.internet.MimeUtility;
    
    /**
     * Gets a text from .eml file.
     *
     * @author shinsuke
     *
     */
    public class EmlExtractor extends AbstractExtractor {
        /** Array of day of week abbreviations used for parsing received dates */
        private static final String[] DAY_OF_WEEK = { "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" };
    
        /** Logger instance for this class */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.6K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.PreDestroy;
    
    /**
     * Extracts text content from various document formats using JODConverter.
     */
    public class JodExtractor extends AbstractExtractor {
        /** Logger for this class. */
        private static final Logger logger = LogManager.getLogger(JodExtractor.class);
    
        /** Office manager for document conversion. */
        protected OfficeManager officeManager;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content by executing an external command.
     */
    public class CommandExtractor extends AbstractExtractor {
        private static final Logger logger = LogManager.getLogger(CommandExtractor.class);
    
        /** The encoding for the output. */
        protected String outputEncoding = Constants.UTF_8;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 16K bytes
    - Viewed (0)
  5. README.md

    }
    
    // Wait for completion
    crawler.awaitTermination();
    System.out.println("Crawling completed");
    ```
    
    ### Custom Content Extractor
    
    ```java
    public class CustomExtractor extends AbstractExtractor {
        @Override
        public ExtractData getText(final InputStream inputStream, final Map<String, String> params) {
            // Custom extraction logic
            ExtractData extractData = new ExtractData();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
Back to top