Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 7 of 7 for AbstractExtractor (0.06 sec)

  1. CLAUDE.md

    **Strategy**: `CrawlerClient`, `Extractor`, `Transformer` - pluggable implementations
    **Builder**: `RequestDataBuilder`, `ExtractorBuilder` - fluent construction
    **Template Method**: `AbstractCrawlerClient`, `AbstractExtractor` - common logic with overrides
    **DI**: LastaFlute container with `@Resource` and XML config
    
    ### Core Principles
    
    **Thread Safety**:
    - `AtomicLong` for counters (`CrawlerContext.accessCount`)
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

    import com.google.common.base.Charsets;
    
    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.PreDestroy;
    
    /**
     * Extract a text by using external http server.
     */
    public class ApiExtractor extends AbstractExtractor {
    
        private static final Logger logger = LogManager.getLogger(ApiExtractor.class);
    
        /** The URL of the API endpoint. */
        protected String url;
    
        /** The access timeout in seconds. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java

     *   <li>Quoted field handling</li>
     *   <li>Column names as metadata</li>
     *   <li>Configurable encoding and row limits</li>
     * </ul>
     */
    public class CsvExtractor extends AbstractExtractor {
        /** Logger instance for this class. */
        private static final Logger logger = LogManager.getLogger(CsvExtractor.class);
    
        /** Default encoding for CSV files. */
        protected String encoding = Constants.UTF_8;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java

    import jakarta.mail.internet.MimeMessage;
    import jakarta.mail.internet.MimeUtility;
    
    /**
     * Gets a text from .eml file.
     *
     * @author shinsuke
     *
     */
    public class EmlExtractor extends AbstractExtractor {
        /** Array of day of week abbreviations used for parsing received dates */
        private static final String[] DAY_OF_WEEK = { "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" };
    
        /** Logger instance for this class */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.6K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.PreDestroy;
    
    /**
     * Extracts text content from various document formats using JODConverter.
     */
    public class JodExtractor extends AbstractExtractor {
        /** Logger for this class. */
        private static final Logger logger = LogManager.getLogger(JodExtractor.class);
    
        /** Office manager for document conversion. */
        protected OfficeManager officeManager;
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 10.4K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content by executing an external command.
     */
    public class CommandExtractor extends AbstractExtractor {
        private static final Logger logger = LogManager.getLogger(CommandExtractor.class);
    
        /** The encoding for the output. */
        protected String outputEncoding = Constants.UTF_8;
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 16.1K bytes
    - Viewed (0)
  7. README.md

    }
    
    // Wait for completion
    crawler.awaitTermination();
    System.out.println("Crawling completed");
    ```
    
    ### Custom Content Extractor
    
    ```java
    public class CustomExtractor extends AbstractExtractor {
        @Override
        public ExtractData getText(final InputStream inputStream, final Map<String, String> params) {
            // Custom extraction logic
            ExtractData extractData = new ExtractData();
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
Back to top