- Sort Score
- Result 10 results
- Languages All
Results 51 - 60 of 119 for extractors (0.23 sec)
-
src/main/java/org/codelibs/fess/helper/DocumentHelper.java
import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.CrawlingAccessException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.impl.TikaExtractor; import org.codelibs.fess.crawler.processor.ResponseProcessor; import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor; import org.codelibs.fess.crawler.rule.Rule;
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Aug 07 03:06:29 UTC 2025 - 17.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java
import org.apache.logging.log4j.Logger; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.Detector; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 30.7K bytes - Viewed (0) -
fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml
<include path="crawler/client.xml"/> <include path="crawler/rule.xml"/> <include path="crawler/filter.xml"/> <include path="crawler/interval.xml"/> <include path="crawler/extractor.xml"/> <include path="crawler/mimetype.xml"/> <include path="crawler/encoding.xml"/> <include path="crawler/urlconverter.xml"/> <include path="crawler/log.xml"/> <include path="crawler/sitemaps.xml"/>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Nov 07 04:44:10 UTC 2024 - 2.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java
/** * A response processor implementation that handles sitemaps. * It parses the response body as a SitemapSet, extracts URLs from the sitemaps, * and adds them as child URLs to be crawled. * * <p> * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text. * It then iterates through the sitemaps in the SitemapSet, extracts the URL * from each sitemap, and creates a new {@link RequestData} object for each URL.
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 3.4K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler.xml
<include path="crawler/container.xml"/> <include path="crawler/client.xml"/> <include path="crawler/rule.xml"/> <include path="crawler/filter.xml"/> <include path="crawler/interval.xml"/> <include path="crawler/extractor.xml"/> <include path="crawler/mimetype.xml"/> <include path="crawler/encoding.xml"/> <include path="crawler/urlconverter.xml"/> <include path="crawler/log.xml"/> <include path="crawler/sitemaps.xml"/>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Tue Nov 28 13:40:25 UTC 2017 - 1.7K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java
*/ FessConfig getFessConfig(); /** * Gets the logger instance for this transformer. * * @return the logger instance */ Logger getLogger(); /** * Extracts the host name from a URL string. * Removes protocol and path components to return just the hostname. * * @param u the URL string to extract host from
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 13.8K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/ThemeHelper.java
/** * Default constructor for ThemeHelper. */ public ThemeHelper() { // Default constructor } /** * Installs a theme from the given artifact. * Extracts theme files from the JAR and deploys them to appropriate directories. * * @param artifact the theme artifact to install * @throws ThemeException if installation fails */
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 7K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/util/ComponentUtil.java
*/ public static IntervalControlHelper getIntervalControlHelper() { return getComponent(INTERVAL_CONTROL_HELPER); } /** * Gets the extractor factory component. * @return The extractor factory. */ public static ExtractorFactory getExtractorFactory() { return getComponent(EXTRACTOR_FACTORY); } /** * Gets a job executor by name.
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 28.9K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/PermissionHelper.java
* * @param userPrefix the user prefix to set */ public void setUserPrefix(final String userPrefix) { this.userPrefix = userPrefix; } /** * Extracts role type information from SMB (Server Message Block) response data. * Processes both SMB and SMB1 protocols to extract allowed and denied SIDs. * * @param responseData the response data containing SMB metadata
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformerTest.java
+ "</doc>"; final ResponseData responseData = new ResponseData(); responseData.setResponseBody(ResourceUtil.getResourceAsFile("extractor/test.xml"), false); responseData.setCharSet(Constants.UTF_8); final ResultData resultData = xmlTransformer.transform(responseData); assertEquals(result, new String(resultData.getData(), Constants.UTF_8));
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 13.5K bytes - Viewed (0)