Search Options

Display Count
Sort
Preferred Language
Advanced Search

Results 1 - 10 of 12 for Extractor (0.13 seconds)

  1. src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java

                throw new FessSystemException("Could not find extractorFactory.");
            }
            final Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType());
            if (logger.isDebugEnabled()) {
                logger.debug("url={}, extractor={}", responseData.getUrl(), extractor);
            }
            return extractor;
        }
    Created: Sat Dec 20 09:19:18 GMT 2025
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 3.5K bytes
    - Click Count (0)
  2. src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

        /**
         * Get the extracted data.
         * @param extractor The extractor.
         * @param in The input stream.
         * @param params The parameters.
         * @return The extracted data.
         */
        protected ExtractData getExtractData(final Extractor extractor, final InputStream in, final Map<String, String> params) {
            try {
                return extractor.getText(in, params);
            } catch (final RuntimeException e) {
    Created: Sat Dec 20 09:19:18 GMT 2025
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 25.7K bytes
    - Click Count (0)
  3. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

        }
    
        /**
         * Gets the appropriate extractor for the given response data.
         * Selects an extractor based on the MIME type or falls back to the Tika extractor.
         *
         * @param responseData the response data containing the document to extract
         * @return the extractor instance for processing the document
         * @throws FessSystemException if no suitable extractor can be found
         */
        @Override
    Created: Sat Dec 20 09:19:18 GMT 2025
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 3.8K bytes
    - Click Count (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

                        final Extractor extractor = extractorFactory.getExtractor(mimeType);
                        if (extractor != null) {
                            try {
                                final Map<String, String> map = new HashMap<>();
                                map.put(ExtractData.RESOURCE_NAME_KEY, filename);
                                buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Thu Dec 11 08:38:29 GMT 2025
    - 5.1K bytes
    - Click Count (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java

                        final Extractor extractor = extractorFactory.getExtractor(mimeType);
                        if (extractor != null) {
                            try {
                                final Map<String, String> map = new HashMap<>();
                                map.put(ExtractData.RESOURCE_NAME_KEY, filename);
                                buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Thu Dec 11 08:38:29 GMT 2025
    - 4.8K bytes
    - Click Count (0)
  6. src/main/java/org/codelibs/fess/helper/DocumentHelper.java

    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.CrawlingAccessException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.impl.TikaExtractor;
    import org.codelibs.fess.crawler.processor.ResponseProcessor;
    import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
    import org.codelibs.fess.crawler.rule.Rule;
    Created: Sat Dec 20 09:19:18 GMT 2025
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 17.4K bytes
    - Click Count (0)
  7. CLAUDE.md

    ```
    
    3. **Add to factory**:
    ```java
    clientFactory.addClient("^myprotocol://.*", myClient);
    ```
    
    4. **Add tests**: Unit + integration
    
    ### Adding a Content Extractor
    
    1. **Implement `Extractor`**:
    ```java
    public class MyExtractor extends AbstractExtractor {
        @Override
        public ExtractData getText(InputStream in, Map<String, String> params) {
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Fri Nov 28 17:31:34 GMT 2025
    - 10.7K bytes
    - Click Count (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.Map;
    
    import org.codelibs.core.io.InputStreamUtil;
    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.ExtractData;
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Thu Dec 11 08:38:29 GMT 2025
    - 2K bytes
    - Click Count (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.nio.charset.Charset;
    import java.util.ArrayList;
    import java.util.List;
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Thu Dec 11 08:38:29 GMT 2025
    - 12.8K bytes
    - Click Count (0)
  10. .teamcity/subprojects.json

        "name": "jacoco",
        "path": "platforms/jvm/jacoco",
        "unitTests": true,
        "functionalTests": true,
        "crossVersionTests": false
      },
      {
        "name": "java-api-extractor",
        "path": "platforms/core-configuration/java-api-extractor",
        "unitTests": true,
        "functionalTests": false,
        "crossVersionTests": false
      },
      {
        "name": "java-compiler-plugin",
    Created: Wed Dec 31 11:36:14 GMT 2025
    - Last Modified: Thu Dec 18 18:40:11 GMT 2025
    - 37.5K bytes
    - Click Count (0)
Back to Top