Search Options

Display Count
Sort
Preferred Language
Advanced Search

Results 1 - 10 of 317 for extractor (0.05 seconds)

  1. src/main/resources/crawler/extractor+tikaExtractor.xml

    	"http://dbflute.org/meta/lastadi10.dtd">
    <components namespace="fessCrawler">
    	<include path="crawler/container.xml" />
    	<component name="tikaExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.TikaExtractor">
    		<property name="maxCompressionRatio">2</property>
    		<property name="maxUncompressionSize">10000000</property>
    	</component>
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Sun Aug 25 12:46:12 GMT 2019
    - 461 bytes
    - Click Count (0)
  2. fess-crawler-lasta/src/main/resources/crawler/extractor.xml

    	<component name="lhaExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.LhaExtractor" />
    	<component name="textExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.TextExtractor" />
    	<component name="htmlExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.HtmlExtractor">
    		<property name="featureMap">
    			<component class="java.util.LinkedHashMap">
    				<postConstruct name="put">
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Wed Feb 11 01:15:55 GMT 2026
    - 50.4K bytes
    - Click Count (0)
  3. src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java

                throw new FessSystemException("Could not find extractorFactory.");
            }
            final Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType());
            if (logger.isDebugEnabled()) {
                logger.debug("url={}, extractor={}", responseData.getUrl(), extractor);
            }
            return extractor;
        }
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 3.5K bytes
    - Click Count (0)
  4. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

        }
    
        /**
         * Gets the appropriate extractor for the given response data.
         * Selects an extractor based on the MIME type or falls back to the Tika extractor.
         *
         * @param responseData the response data containing the document to extract
         * @return the extractor instance for processing the document
         * @throws FessSystemException if no suitable extractor can be found
         */
        @Override
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 3.8K bytes
    - Click Count (0)
  5. src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

        }
    
        /**
         * Get the extracted data.
         * @param extractor The extractor.
         * @param in The input stream.
         * @param params The parameters.
         * @return The extracted data.
         */
        protected ExtractData getExtractData(final Extractor extractor, final InputStream in, final Map<String, String> params) {
            try {
                return extractor.getText(in, params);
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 25.7K bytes
    - Click Count (0)
  6. CLAUDE.md

    - **ResponseProcessor**: `DefaultResponseProcessor`, `SitemapsResponseProcessor`, `NullResponseProcessor`
    - **Transformer**: `HtmlTransformer`, `XmlTransformer`, `FileTransformer`, etc.
    - **Extractor**: Weight-based selection (tries in descending weight order)
    
    ### Key Extractors
    
    `TikaExtractor`, `PdfExtractor`, `MsWordExtractor`, `MsExcelExtractor`, `MsPowerPointExtractor`, `ZipExtractor`, `HtmlExtractor`, `MarkdownExtractor`, `EmlExtractor`
    
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Mar 12 03:39:20 GMT 2026
    - 8.1K bytes
    - Click Count (0)
  7. README.md

    - Audio formats (MP3, WAV, FLAC)
    - Video formats (MP4, AVI, MOV)
    - Metadata extraction from media files
    
    ## Architecture
    
    ### Multi-Module Structure
    
    ```
    fess-crawler-parent/
    ├── fess-crawler/              # Core crawler framework
    │   ├── client/               # Protocol clients (HTTP, FTP, SMB, etc.)
    │   ├── extractor/           # Content extractors
    │   ├── transformer/         # Data transformers
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Sun Aug 31 05:32:52 GMT 2025
    - 15.3K bytes
    - Click Count (0)
  8. src/test/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformerTest.java

    import org.codelibs.fess.Constants;
    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.exception.CrawlingAccessException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.mylasta.direction.FessConfig;
    import org.codelibs.fess.unit.UnitFessTestCase;
    import org.codelibs.fess.util.ComponentUtil;
    import org.junit.jupiter.api.Test;
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Thu Jan 15 12:54:47 GMT 2026
    - 8.1K bytes
    - Click Count (0)
  9. src/main/java/org/codelibs/fess/util/ComponentUtil.java

         */
        public static IntervalControlHelper getIntervalControlHelper() {
            return getComponent(INTERVAL_CONTROL_HELPER);
        }
    
        /**
         * Gets the extractor factory component.
         * @return The extractor factory.
         */
        public static ExtractorFactory getExtractorFactory() {
            return getComponent(EXTRACTOR_FACTORY);
        }
    
        /**
         * Gets a job executor by name.
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Sat Mar 28 06:59:19 GMT 2026
    - 30.9K bytes
    - Click Count (0)
  10. .teamcity/subprojects.json

        "path": "platforms/jvm/jacoco-workers",
        "unitTests": false,
        "functionalTests": false,
        "crossVersionTests": false
      },
      {
        "name": "java-api-extractor",
        "path": "platforms/core-configuration/java-api-extractor",
        "unitTests": true,
        "functionalTests": false,
        "crossVersionTests": false
      },
      {
        "name": "java-compiler-plugin",
    Created: Wed Apr 01 11:36:16 GMT 2026
    - Last Modified: Fri Mar 27 15:03:00 GMT 2026
    - 42K bytes
    - Click Count (0)
Back to Top