Search Options

Results per page
Sort
Preferred Languages
Advance

Results 11 - 20 of 24 for AbstractExtractor (0.05 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java

     * This extractor can extract text content from files within LHA archives
     * by using appropriate extractors for each contained file type.
     *
     * @author shinsuke
     */
    public class LhaExtractor extends AbstractExtractor {
        /** Logger for this class. */
        private static final Logger logger = LogManager.getLogger(LhaExtractor.class);
    
        /** Maximum content size for extraction. -1 means no limit. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 5.9K bytes
    - Viewed (0)
  2. CLAUDE.md

    **Strategy**: `CrawlerClient`, `Extractor`, `Transformer` - pluggable implementations
    **Builder**: `RequestDataBuilder`, `ExtractorBuilder` - fluent construction
    **Template Method**: `AbstractCrawlerClient`, `AbstractExtractor` - common logic with overrides
    **DI**: LastaFlute container with `@Resource` and XML config
    
    ### Core Principles
    
    **Thread Safety**:
    - `AtomicLong` for counters (`CrawlerContext.accessCount`)
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java

    import org.codelibs.fess.crawler.util.IgnoreCloseInputStream;
    
    import jakarta.annotation.Resource;
    
    /**
     * Extracts text content from ZIP archives.
     */
    public class ZipExtractor extends AbstractExtractor {
        private static final Logger logger = LogManager.getLogger(ZipExtractor.class);
    
        /**
         * The archive stream factory.
         */
        @Resource
        protected ArchiveStreamFactory archiveStreamFactory;
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 4.8K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

    import org.codelibs.fess.crawler.util.IgnoreCloseInputStream;
    
    import jakarta.annotation.Resource;
    
    /**
     * Extracts text content from TAR archives.
     */
    public class TarExtractor extends AbstractExtractor {
        private static final Logger logger = LogManager.getLogger(TarExtractor.class);
    
        /**
         * The archive stream factory.
         */
        @Resource
        protected ArchiveStreamFactory archiveStreamFactory;
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java

     *   <li>Link URL extraction</li>
     *   <li>Code block content extraction</li>
     *   <li>Clean text conversion from Markdown</li>
     *   <li>Configurable encoding</li>
     * </ul>
     */
    public class MarkdownExtractor extends AbstractExtractor {
        /** Logger instance for this class. */
        private static final Logger logger = LogManager.getLogger(MarkdownExtractor.class);
    
        /** Default encoding for Markdown files. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 8.2K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java

     * The extractor first tries to match against the URL, then falls back to the resource name if available.
     *
     * @author shinsuke
     */
    public abstract class PasswordBasedExtractor extends AbstractExtractor {
    
        /** Logger instance for this class. */
        private static final Logger logger = LogManager.getLogger(PasswordBasedExtractor.class);
    
        /** Map of regex patterns to passwords for static password configuration. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java

     * It handles encoding detection, HTML entity unescaping, and tag-based content extraction.
     *
     */
    public abstract class AbstractXmlExtractor extends AbstractExtractor {
    
        /**
         * Logger for this class.
         */
        protected static final Logger logger = LogManager.getLogger(AbstractXmlExtractor.class);
    
        /**
         * UTF-7 Byte Order Mark definition.
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 8.6K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

    import com.google.common.base.Charsets;
    
    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.PreDestroy;
    
    /**
     * Extract a text by using external http server.
     */
    public class ApiExtractor extends AbstractExtractor {
    
        private static final Logger logger = LogManager.getLogger(ApiExtractor.class);
    
        /** The URL of the API endpoint. */
        protected String url;
    
        /** The access timeout in seconds. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java

     *   <li>Nested structure flattening with configurable depth</li>
     *   <li>Array element extraction</li>
     *   <li>Configurable field separator and array formatting</li>
     * </ul>
     */
    public class JsonExtractor extends AbstractExtractor {
        /** Logger instance for this class. */
        private static final Logger logger = LogManager.getLogger(JsonExtractor.class);
    
        /** Jackson ObjectMapper for JSON parsing. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 9.7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java

     *   <li>Quoted field handling</li>
     *   <li>Column names as metadata</li>
     *   <li>Configurable encoding and row limits</li>
     * </ul>
     */
    public class CsvExtractor extends AbstractExtractor {
        /** Logger instance for this class. */
        private static final Logger logger = LogManager.getLogger(CsvExtractor.class);
    
        /** Default encoding for CSV files. */
        protected String encoding = Constants.UTF_8;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 12.8K bytes
    - Viewed (0)
Back to top