Search Options

Results per page
Sort
Preferred Languages
Advance

Results 31 - 40 of 154 for extracted (1.41 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java

    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content and metadata from CSV files.
     * This extractor provides better structured data extraction compared to Tika's generic text extraction.
     *
     * <p>Features:
     * <ul>
     *   <li>Automatic delimiter detection (comma, tab, semicolon, pipe)</li>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java

     * </p>
     *
     * <p>
     * The transform method takes a ResponseData object containing the XML content and returns a ResultData object with the extracted and formatted data.
     * </p>
     *
     * <p>
     * The getData method returns the data extracted from AccessResultData. It can return either a String representation of the XML or a Map/Bean representation based on the configured dataClass.
     * </p>
     *
     * <p>
     * Example Usage:
     * </p>
     *
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 23.9K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

            } finally {
                xpathAPI.remove();
            }
        }
    
        /**
         * Extracts strings from a document using the specified XPath expression.
         *
         * @param document the DOM document to extract strings from
         * @param path the XPath expression to evaluate
         * @return an array of strings extracted from the document
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Oct 04 08:47:19 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/form/FormScheme.java

                throw new IORuntimeException(e);
            }
        }
    
        /**
         * Extracts the token value from the content using the given pattern.
         * @param tokenPattern The regex pattern.
         * @param content The content to search.
         * @return The extracted token value.
         */
        protected String getTokenValue(final String tokenPattern, final String content) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14.3K bytes
    - Viewed (1)
  5. src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java

            }
            return value;
        }
    
        /**
         * Extracts the filename from a URL, handling various protocols and URL decoding.
         * Processes HTTP, HTTPS, file, SMB, and FTP URLs appropriately.
         *
         * @param url the URL to extract filename from
         * @param encoding the character encoding (currently unused in this method)
         * @return the extracted filename, or empty string if none found
         */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Dec 11 09:47:03 UTC 2025
    - 14.1K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

            return getText(inputStream, params, null);
        }
    
        /**
         * Returns an extracted text.
         *
         * @param inputStream An input stream.
         * @param params A map of parameters.
         * @param postFilter A post filter.
         * @return An extracted data.
         */
        protected ExtractData getText(final InputStream inputStream, final Map<String, String> params,
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 30.8K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

            }
            return null;
        }
    
        /**
         * Extracts URLs from HTML tag attributes using XPath.
         *
         * @param url the base URL for resolving relative URLs
         * @param document the document to extract URLs from
         * @param xpath the XPath expression to select elements
         * @param attr the attribute name to extract URLs from
         * @param encoding the character encoding to use
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Nov 29 07:42:33 UTC 2025
    - 30.5K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/suggest/util/SuggestUtil.java

            return keywords.toArray(String[]::new);
        }
    
        /**
         * Extracts keywords from the given query string based on the specified fields.
         *
         * @param q the query string to parse and extract keywords from
         * @param fields the fields to consider when extracting keywords
         * @return a list of unique keywords extracted from the query string
         */
    Registered: Sat Dec 20 13:04:59 UTC 2025
    - Last Modified: Sun Nov 23 11:21:40 UTC 2025
    - 17.5K bytes
    - Viewed (1)
  9. src/main/java/org/codelibs/fess/ds/DataStoreFactory.java

         * This method searches for 'fess_ds++.xml' configuration files within JAR files
         * in the data store plugin directory and extracts component class names.
         *
         * <p>The method uses secure XML parsing features to prevent XXE attacks and
         * other XML-based vulnerabilities. Component class names are extracted from
         * the 'class' attribute of 'component' elements in the XML files.</p>
         *
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 9K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

        }
    
        /**
         * Get the extracted data.
         * @param extractor The extractor.
         * @param in The input stream.
         * @param params The parameters.
         * @return The extracted data.
         */
        protected ExtractData getExtractData(final Extractor extractor, final InputStream in, final Map<String, String> params) {
            try {
                return extractor.getText(in, params);
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 25.7K bytes
    - Viewed (0)
Back to top