Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 17 for Extraction (0.05 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java

    /**
     * Extracts text content and metadata from CSV files.
     * This extractor provides better structured data extraction compared to Tika's generic text extraction.
     *
     * <p>Features:
     * <ul>
     *   <li>Automatic delimiter detection (comma, tab, semicolon, pipe)</li>
     *   <li>Header row detection and extraction</li>
     *   <li>Column name to data value association</li>
     *   <li>Quoted field handling</li>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

            }
            return URI.create(currentUrl);
        }
    
        /**
         * Gets child URL extraction rules from configuration.
         *
         * @param responseData the response data from crawling
         * @param resultData the result data
         * @return stream of tag-attribute pairs for URL extraction
         */
        @Override
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Dec 12 13:58:40 UTC 2025
    - 54.6K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java

            } catch (final Exception e) {
                throw new ExtractException("Failed to extract text content using encoding: " + getEncoding(), e);
            }
        }
    
        /**
         * Returns the encoding used for text extraction.
         * @return the encoding
         */
        public String getEncoding() {
            return encoding;
        }
    
        /**
         * Sets the encoding.
         * @param encoding The encoding to set.
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 2K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java

    import jakarta.annotation.PostConstruct;
    
    /**
     * File transformer implementation for the Fess search engine.
     * This transformer handles file-based document transformation and content extraction
     * using the Fess file transformation process with support for various file types.
     *
     * <p>It extends AbstractFessFileTransformer to provide specialized file processing
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 3.5K bytes
    - Viewed (0)
  5. CLAUDE.md

    **Fess Crawler** is a Java-based web crawling framework for enterprise content extraction.
    
    ### Essential Info
    
    - **Language**: Java 21+
    - **Build**: Maven 3.x
    - **License**: Apache 2.0
    - **DI**: LastaFlute DI
    - **Repo**: https://github.com/codelibs/fess-crawler
    
    ### Tech Stack
    
    - **HTTP**: Apache HttpComponents 4.5+
    - **Extraction**: Apache Tika 3.0+, POI 5.3+, PDFBox 3.0+
    - **Testing**: JUnit 4, UTFlute, Mockito 5.7.0
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

            this.propertyMap = propertyMap;
        }
    
        /**
         * Gets the map of child URL extraction rules.
         *
         * @return the child URL rule map
         */
        public Map<String, String> getChildUrlRuleMap() {
            return childUrlRuleMap;
        }
    
        /**
         * Sets the map of child URL extraction rules.
         *
         * @param childUrlRuleMap the child URL rule map to set
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Nov 29 07:42:33 UTC 2025
    - 30.5K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/fess/helper/ThemeHelper.java

    import org.codelibs.fess.helper.PluginHelper.ArtifactType;
    import org.codelibs.fess.util.ResourceUtil;
    
    /**
     * Helper class for managing theme installation and uninstallation.
     * Handles the extraction and deployment of theme files from JAR artifacts.
     */
    public class ThemeHelper {
        private static final Logger logger = LogManager.getLogger(ThemeHelper.class);
    
        /**
         * Default constructor for ThemeHelper.
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 7.1K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/helper/DocumentHelper.java

    /**
     * Helper class for document processing and manipulation in the Fess search system.
     * This class provides utilities for processing document content, titles, and digests,
     * handling text normalization, content extraction, and similar document hash encoding/decoding.
     * It also manages document processing requests and integrates with the crawler system.
     *
     */
    public class DocumentHelper {
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 17.4K bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

    import org.codelibs.fess.util.ComponentUtil;
    
    import jakarta.annotation.PostConstruct;
    
    /**
     * Standard transformer implementation for the Fess search engine.
     * This transformer handles document transformation and content extraction using
     * the standard Fess file transformation process with support for various content types.
     *
     * <p>It extends AbstractFessFileTransformer to provide file-specific transformation
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

            }
            return false;
        }
    
        /**
         * Create the parameters for extraction.
         * @param responseData The response data.
         * @param crawlingConfig The crawling configuration.
         * @return The parameters for extraction.
         */
        protected Map<String, String> createExtractParams(final ResponseData responseData, final CrawlingConfig crawlingConfig) {
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 25.7K bytes
    - Viewed (0)
Back to top