Search Options

Results per page
Sort
Preferred Languages
Advance

Results 51 - 60 of 119 for extractors (0.05 sec)

  1. src/main/java/org/codelibs/fess/helper/DocumentHelper.java

    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.CrawlingAccessException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.impl.TikaExtractor;
    import org.codelibs.fess.crawler.processor.ResponseProcessor;
    import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
    import org.codelibs.fess.crawler.rule.Rule;
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 17.2K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

    import org.apache.logging.log4j.Logger;
    import org.apache.tika.config.TikaConfig;
    import org.apache.tika.detect.Detector;
    import org.apache.tika.exception.TikaException;
    import org.apache.tika.extractor.EmbeddedDocumentExtractor;
    import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
    import org.apache.tika.io.TemporaryResources;
    import org.apache.tika.io.TikaInputStream;
    import org.apache.tika.metadata.Metadata;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.7K bytes
    - Viewed (0)
  3. fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml

        <include path="crawler/client.xml"/>
        <include path="crawler/rule.xml"/>
        <include path="crawler/filter.xml"/>
        <include path="crawler/interval.xml"/>
        <include path="crawler/extractor.xml"/>
        <include path="crawler/mimetype.xml"/>
        <include path="crawler/encoding.xml"/>
        <include path="crawler/urlconverter.xml"/>
        <include path="crawler/log.xml"/>
        <include path="crawler/sitemaps.xml"/>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Nov 07 04:44:10 UTC 2024
    - 2.2K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java

    /**
     * A response processor implementation that handles sitemaps.
     * It parses the response body as a SitemapSet, extracts URLs from the sitemaps,
     * and adds them as child URLs to be crawled.
     *
     * <p>
     * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text.
     * It then iterates through the sitemaps in the SitemapSet, extracts the URL
     * from each sitemap, and creates a new {@link RequestData} object for each URL.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  5. fess-crawler-lasta/src/main/resources/crawler.xml

    	<include path="crawler/container.xml"/>
    	<include path="crawler/client.xml"/>
    	<include path="crawler/rule.xml"/>
    	<include path="crawler/filter.xml"/>
    	<include path="crawler/interval.xml"/>
    	<include path="crawler/extractor.xml"/>
    	<include path="crawler/mimetype.xml"/>
    	<include path="crawler/encoding.xml"/>
    	<include path="crawler/urlconverter.xml"/>
    	<include path="crawler/log.xml"/>
    	<include path="crawler/sitemaps.xml"/>
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Tue Nov 28 13:40:25 UTC 2017
    - 1.7K bytes
    - Viewed (0)
  6. src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java

         */
        FessConfig getFessConfig();
    
        /**
         * Gets the logger instance for this transformer.
         *
         * @return the logger instance
         */
        Logger getLogger();
    
        /**
         * Extracts the host name from a URL string.
         * Removes protocol and path components to return just the hostname.
         *
         * @param u the URL string to extract host from
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 13.8K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/fess/helper/ThemeHelper.java

        /**
         * Default constructor for ThemeHelper.
         */
        public ThemeHelper() {
            // Default constructor
        }
    
        /**
         * Installs a theme from the given artifact.
         * Extracts theme files from the JAR and deploys them to appropriate directories.
         *
         * @param artifact the theme artifact to install
         * @throws ThemeException if installation fails
         */
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 7K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/util/ComponentUtil.java

         */
        public static IntervalControlHelper getIntervalControlHelper() {
            return getComponent(INTERVAL_CONTROL_HELPER);
        }
    
        /**
         * Gets the extractor factory component.
         * @return The extractor factory.
         */
        public static ExtractorFactory getExtractorFactory() {
            return getComponent(EXTRACTOR_FACTORY);
        }
    
        /**
         * Gets a job executor by name.
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 28.9K bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/helper/PermissionHelper.java

         *
         * @param userPrefix the user prefix to set
         */
        public void setUserPrefix(final String userPrefix) {
            this.userPrefix = userPrefix;
        }
    
        /**
         * Extracts role type information from SMB (Server Message Block) response data.
         * Processes both SMB and SMB1 protocols to extract allowed and denied SIDs.
         *
         * @param responseData the response data containing SMB metadata
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformerTest.java

                    + "</doc>";
    
            final ResponseData responseData = new ResponseData();
            responseData.setResponseBody(ResourceUtil.getResourceAsFile("extractor/test.xml"), false);
            responseData.setCharSet(Constants.UTF_8);
            final ResultData resultData = xmlTransformer.transform(responseData);
            assertEquals(result, new String(resultData.getData(), Constants.UTF_8));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 13.5K bytes
    - Viewed (0)
Back to top