Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 53 for extractor (0.05 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

    import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    import org.codelibs.fess.crawler.helper.MimeTypeHelper;
    import org.codelibs.fess.crawler.util.IgnoreCloseInputStream;
    
    import jakarta.annotation.Resource;
    
    /**
     * Extracts text content from TAR archives.
     */
    public class TarExtractor extends AbstractExtractor {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java

    import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    import org.codelibs.fess.crawler.helper.MimeTypeHelper;
    import org.codelibs.fess.crawler.util.IgnoreCloseInputStream;
    
    import jakarta.annotation.Resource;
    
    /**
     * Extracts text content from ZIP archives.
     */
    public class ZipExtractor extends AbstractExtractor {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 4.8K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content and metadata from CSV files.
     * This extractor provides better structured data extraction compared to Tika's generic text extraction.
     *
     * <p>Features:
     * <ul>
     *   <li>Automatic delimiter detection (comma, tab, semicolon, pipe)</li>
     *   <li>Header row detection and extraction</li>
     *   <li>Column name to data value association</li>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java

     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.Map;
    
    import org.codelibs.core.io.InputStreamUtil;
    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content from an input stream as plain text.
     */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 2K bytes
    - Viewed (0)
  5. .teamcity/subprojects.json

        "name": "jacoco",
        "path": "platforms/jvm/jacoco",
        "unitTests": true,
        "functionalTests": true,
        "crossVersionTests": false
      },
      {
        "name": "java-api-extractor",
        "path": "platforms/core-configuration/java-api-extractor",
        "unitTests": true,
        "functionalTests": false,
        "crossVersionTests": false
      },
      {
        "name": "java-compiler-plugin",
    Registered: Wed Dec 31 11:36:14 UTC 2025
    - Last Modified: Thu Dec 18 18:40:11 UTC 2025
    - 37.5K bytes
    - Viewed (0)
  6. .idea/gradle.xml

                <option value="$PROJECT_DIR$/platforms/core-configuration/isolated-action-services" />
                <option value="$PROJECT_DIR$/platforms/core-configuration/java-api-extractor" />
                <option value="$PROJECT_DIR$/platforms/core-configuration/kotlin-dsl" />
                <option value="$PROJECT_DIR$/platforms/core-configuration/kotlin-dsl-integ-tests" />
    Registered: Wed Dec 31 11:36:14 UTC 2025
    - Last Modified: Thu Dec 11 18:02:10 UTC 2025
    - 23.2K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java

            }
            return value;
        }
    
        /**
         * Extracts the filename from a URL, handling various protocols and URL decoding.
         * Processes HTTP, HTTPS, file, SMB, and FTP URLs appropriately.
         *
         * @param url the URL to extract filename from
         * @param encoding the character encoding (currently unused in this method)
         * @return the extracted filename, or empty string if none found
         */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Dec 11 09:47:03 UTC 2025
    - 14.1K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

                return URI.create(url);
            }
            return null;
        }
    
        /**
         * Adds a field extraction rule with pruning option.
         *
         * @param name the field name
         * @param xpath the XPath expression for extraction
         * @param isPruned whether the extracted content should be pruned
         */
        public void addFieldRule(final String name, final String xpath, final boolean isPruned) {
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Dec 12 13:58:40 UTC 2025
    - 54.6K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/net/protocol/gcs/Handler.java

             * This constructor parses the URL to extract bucket and object names.
             *
             * @param url The GCS URL to connect to
             */
            protected GcsURLConnection(final URL url) {
                super(url);
                // Extract bucket name from host
                bucketName = url.getHost() != null ? url.getHost() : StringUtil.EMPTY;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 9.6K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/net/protocol/s3/Handler.java

             * This constructor parses the URL to extract bucket and object names.
             *
             * @param url The S3 URL to connect to
             */
            protected S3URLConnection(final URL url) {
                super(url);
                // Extract bucket name from host
                bucketName = url.getHost() != null ? url.getHost() : StringUtil.EMPTY;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 07:57:44 UTC 2025
    - 9.5K bytes
    - Viewed (0)
Back to top