Search Options

Results per page
Sort
Preferred Languages
Advance

Results 31 - 40 of 45 for extras (0.02 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractor.java

     *
     */
    public class MsWordExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsWordExtractor instance.
         */
        public MsWordExtractor() {
            super();
        }
    
        /**
         * Extracts text from the Word input stream.
         * @param in The input stream.
         * @param params The parameters.
         * @return The extracted data.
         */
        @Override
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsExcelExtractor.java

     */
    public class MsExcelExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsExcelExtractor instance.
         */
        public MsExcelExtractor() {
            super();
        }
    
        /**
         * Extracts text from the Excel input stream.
         * @param in The input stream.
         * @param params The parameters.
         * @return The extracted data.
         */
        @Override
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/TextTransformer.java

    import jakarta.annotation.Resource;
    
    /**
     * TextTransformer is a class that transforms a ResponseData object into a ResultData object containing the extracted text content.
     * It uses an Extractor to extract the text from the response body based on the MIME type.
     * The extracted text is then converted into a byte array using the specified charset encoding.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

     * using the Apache Tika library. It extends {@link PasswordBasedExtractor} to handle password-protected files.
     * </p>
     *
     * <p>
     * This class provides methods to extract text from an input stream, handling different scenarios such as:
     * </p>
     * <ul>
     *   <li>Normalizing text content</li>
     *   <li>Handling resource names and content types</li>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.7K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/impl/HostIntervalController.java

            super(params);
        }
    
        /**
         * Delays before processing a URL, ensuring that requests to the same host are not made too frequently.
         * This method extracts the host from the URL and enforces a delay based on the configured
         * delayMillisBeforeProcessing parameter.
         *
         * @throws InterruptedRuntimeException if the thread is interrupted during the delay
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.2K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/RegexRule.java

     * only one of them (allRequired = false). It also supports a default rule that always matches.
     *
     * <p>
     * The class uses a map of field names to Pattern objects to store the regular expressions.
     * The match method extracts the values of the specified fields from the ResponseData and
     * applies the corresponding regular expressions.
     * </p>
     *
     * <p>
     * Example usage:
     * </p>
     *
     * <pre>
     * {@code
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.2K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java

    import jakarta.annotation.Resource;
    
    /**
     * <p>
     * XmlTransformer is a class that extends AbstractTransformer to transform XML documents into a specific format for indexing.
     * It uses XPath expressions to extract data from the XML and stores it in a ResultData object.
     * </p>
     *
     * <p>
     * This class provides several configuration options to customize the XML parsing process, such as:
     * </p>
     * <ul>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 23.9K bytes
    - Viewed (0)
  8. fess-crawler-opensearch/src/main/java/org/codelibs/fess/crawler/service/impl/OpenSearchDataService.java

                } catch (final Exception e) {
                    throw new OpenSearchAccessException("response: " + response, e);
                }
            }
            return targetList;
        }
    
        /**
         * Extracts a field value from OpenSearch results and converts it to the specified type.
         *
         * @param <T> The target type.
         * @param field The field value from OpenSearch.
         * @param clazz The target class.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 10.9K bytes
    - Viewed (0)
  9. fess-crawler-opensearch/src/main/java/org/codelibs/fess/crawler/service/impl/AbstractCrawlerService.java

                }
            } else if (logger.isDebugEnabled()) {
                logger.debug("{} mapping exists.", index);
            }
        }
    
        /**
         * Extracts a Date object from the source map using the specified field name.
         * Handles various date formats including Date objects, timestamp numbers, and ISO date strings.
         *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 34.2K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

    import jakarta.annotation.Resource;
    
    /**
     * {@link HtmlXpathExtractor} is an implementation of the {@link org.codelibs.fess.crawler.extractor.Extractor} interface.
     * It uses XPath expressions to extract text content from HTML documents.
     * <p>
     * This class provides methods to configure the XPath expressions, parser features, and properties.
     * It also includes caching mechanism for XPathAPI instances to improve performance.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
Back to top