Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 12 for ExtractorFactory (0.07 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java

     * supports dependency injection via the {@link Resource} annotation.
     * </p>
     */
    public class ExtractorFactory {
    
        /** Logger instance for this class */
        private static final Logger logger = LogManager.getLogger(ExtractorFactory.class);
    
        /** Container for managing crawler components */
        @Resource
        protected CrawlerContainer crawlerContainer;
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 7.4K bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java

        @Override
        protected Extractor getExtractor(final ResponseData responseData) {
            final ExtractorFactory extractorFactory = ComponentUtil.getExtractorFactory();
            if (extractorFactory == null) {
                throw new FessSystemException("Could not find extractorFactory.");
            }
            final Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType());
            if (logger.isDebugEnabled()) {
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 3.5K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java

                throw new CrawlerSystemException("LHA archive input stream is null. Cannot extract text from null input.");
            }
    
            final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper();
            final ExtractorFactory extractorFactory = getExtractorFactory();
            final StringBuilder buf = new StringBuilder(1000);
    
            File tempFile = null;
            LhaFile lhaFile = null;
            try {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 5.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractor.java

        }
    
        /**
         * Returns the ExtractorFactory instance from the CrawlerContainer.
         * @return The ExtractorFactory instance.
         */
        protected ExtractorFactory getExtractorFactory() {
            final ExtractorFactory extractorFactory = crawlerContainer.getComponent("extractorFactory");
            if (extractorFactory == null) {
                throw new CrawlerSystemException("ExtractorFactory is unavailable.");
            }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 4.6K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

            final ExtractorFactory extractorFactory = getExtractorFactory();
            return new ExtractData(getTextInternal(in, mimeTypeHelper, extractorFactory));
        }
    
        /**
         * Returns a text from the input stream.
         *
         * @param in The input stream.
         * @param mimeTypeHelper The mime type helper.
         * @param extractorFactory The extractor factory.
         * @return A text.
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

            final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper();
            final ExtractorFactory extractorFactory = getExtractorFactory();
            final String mimeType = mimeTypeHelper.getContentType(null, filename);
            if (mimeType != null) {
                final Extractor extractor = extractorFactory.getExtractor(mimeType);
                if (extractor != null) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

        @Override
        protected Extractor getExtractor(final ResponseData responseData) {
            final ExtractorFactory extractorFactory = ComponentUtil.getExtractorFactory();
            if (extractorFactory == null) {
                throw new FessSystemException("Could not find extractorFactory.");
            }
            Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType());
            if (extractor == null) {
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                        factory.addClient("file:.*", container.getComponent("fsClient"));
                    })
                    .singleton("tikaExtractor", TikaExtractor.class)
                    .<ExtractorFactory> singleton("extractorFactory", ExtractorFactory.class, factory -> {
                        TikaExtractor tikaExtractor = container.getComponent("tikaExtractor");
                        factory.addExtractor("text/plain", tikaExtractor);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Tue Nov 11 13:40:14 UTC 2025
    - 25.8K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java

        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            validateInputStream(in);
    
            final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper();
            final ExtractorFactory extractorFactory = getExtractorFactory();
            final StringBuilder buf = new StringBuilder(1000);
            int processedEntries = 0;
            int failedEntries = 0;
    
            try (final ArchiveInputStream ais =
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 4.8K bytes
    - Viewed (0)
  10. CLAUDE.md

    `TikaExtractor` (1000+ formats), `PdfExtractor`, `MsWordExtractor`, `MsExcelExtractor`, `MsPowerPointExtractor`, `ZipExtractor`, `HtmlExtractor`, etc.
    
    **Registration**:
    ```java
    extractorFactory.addExtractor("text/html", htmlExtractor, 2);  // Weight 2
    extractorFactory.addExtractor("text/html", tikaExtractor, 1);  // Fallback
    ```
    
    ### Helpers
    
    **RobotsTxtHelper**: RFC 9309 parsing, user-agent matching, crawl-delay, sitemaps
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
Back to top