Search Options

Display Count
Sort
Preferred Language
Advanced Search

Results 1 - 10 of 130 for extractor (0.05 seconds)

  1. src/main/resources/crawler/extractor+tikaExtractor.xml

    	"http://dbflute.org/meta/lastadi10.dtd">
    <components namespace="fessCrawler">
    	<include path="crawler/container.xml" />
    	<component name="tikaExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.TikaExtractor">
    		<property name="maxCompressionRatio">2</property>
    		<property name="maxUncompressionSize">10000000</property>
    	</component>
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Sun Aug 25 12:46:12 GMT 2019
    - 461 bytes
    - Click Count (0)
  2. src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java

                throw new FessSystemException("Could not find extractorFactory.");
            }
            final Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType());
            if (logger.isDebugEnabled()) {
                logger.debug("url={}, extractor={}", responseData.getUrl(), extractor);
            }
            return extractor;
        }
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 3.5K bytes
    - Click Count (0)
  3. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

        }
    
        /**
         * Gets the appropriate extractor for the given response data.
         * Selects an extractor based on the MIME type or falls back to the Tika extractor.
         *
         * @param responseData the response data containing the document to extract
         * @return the extractor instance for processing the document
         * @throws FessSystemException if no suitable extractor can be found
         */
        @Override
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 3.8K bytes
    - Click Count (0)
  4. CLAUDE.md

    - **ResponseProcessor**: `DefaultResponseProcessor`, `SitemapsResponseProcessor`, `NullResponseProcessor`
    - **Transformer**: `HtmlTransformer`, `XmlTransformer`, `FileTransformer`, etc.
    - **Extractor**: Weight-based selection (tries in descending weight order)
    
    ### Key Extractors
    
    `TikaExtractor`, `PdfExtractor`, `MsWordExtractor`, `MsExcelExtractor`, `MsPowerPointExtractor`, `ZipExtractor`, `HtmlExtractor`, `MarkdownExtractor`, `EmlExtractor`
    
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Mar 12 03:39:20 GMT 2026
    - 8.1K bytes
    - Click Count (0)
  5. src/test/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformerTest.java

    import org.codelibs.fess.Constants;
    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.exception.CrawlingAccessException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.mylasta.direction.FessConfig;
    import org.codelibs.fess.unit.UnitFessTestCase;
    import org.codelibs.fess.util.ComponentUtil;
    import org.junit.jupiter.api.Test;
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Thu Jan 15 12:54:47 GMT 2026
    - 8.1K bytes
    - Click Count (0)
  6. fess-crawler-lasta/src/main/resources/crawler.xml

    	<include path="crawler/container.xml"/>
    	<include path="crawler/client.xml"/>
    	<include path="crawler/rule.xml"/>
    	<include path="crawler/filter.xml"/>
    	<include path="crawler/interval.xml"/>
    	<include path="crawler/extractor.xml"/>
    	<include path="crawler/mimetype.xml"/>
    	<include path="crawler/encoding.xml"/>
    	<include path="crawler/urlconverter.xml"/>
    	<include path="crawler/log.xml"/>
    	<include path="crawler/sitemaps.xml"/>
    
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Tue Nov 28 13:40:25 GMT 2017
    - 1.7K bytes
    - Click Count (0)
  7. fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml

        <include path="crawler/client.xml"/>
        <include path="crawler/rule.xml"/>
        <include path="crawler/filter.xml"/>
        <include path="crawler/interval.xml"/>
        <include path="crawler/extractor.xml"/>
        <include path="crawler/mimetype.xml"/>
        <include path="crawler/encoding.xml"/>
        <include path="crawler/urlconverter.xml"/>
        <include path="crawler/log.xml"/>
        <include path="crawler/sitemaps.xml"/>
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Nov 07 04:44:10 GMT 2024
    - 2.2K bytes
    - Click Count (0)
  8. internal/s3select/jstream/README.md

    #
    
    [![GoDoc](https://godoc.org/github.com/bcicen/jstream?status.svg)](https://godoc.org/github.com/bcicen/jstream)
    
    
    `jstream` is a streaming JSON parser and value extraction library for Go.
    
    Unlike most JSON parsers, `jstream` is document position- and depth-aware -- this enables the extraction of values at a specified depth, eliminating the overhead of allocating encompassing arrays or objects; e.g:
    
    Using the below example document:
    Created: Sun Apr 05 19:28:12 GMT 2026
    - Last Modified: Mon Sep 23 19:35:41 GMT 2024
    - 3.2K bytes
    - Click Count (0)
  9. src/main/java/org/codelibs/fess/suggest/index/contents/ContentsParser.java

                ReadingConverter readingConverter, Normalizer normalizer);
    
        /**
         * Parses a document and extracts suggest items based on the provided fields and converters.
         *
         * @param document The document to parse, represented as a map of field names to values.
         * @param fields The fields to extract from the document.
         * @param tagFieldNames The names of the fields that contain tags.
    Created: Fri Apr 17 09:08:13 GMT 2026
    - Last Modified: Sat Mar 15 06:51:20 GMT 2025
    - 4.1K bytes
    - Click Count (0)
  10. internal/crypto/sse-c.go

    	return metadata
    }
    
    // ParseMetadata extracts all SSE-C related values from the object metadata
    // and checks whether they are well-formed. It returns the sealed object key
    // on success.
    func (ssec) ParseMetadata(metadata map[string]string) (sealedKey SealedKey, err error) {
    	// Extract all required values from object metadata
    	b64IV, ok := metadata[MetaIV]
    	if !ok {
    Created: Sun Apr 05 19:28:12 GMT 2026
    - Last Modified: Sun Sep 28 20:59:21 GMT 2025
    - 5.2K bytes
    - Click Count (0)
Back to Top