- Sort Score
- Result 10 results
- Languages All
Results 11 - 20 of 104 for extraction (0.05 sec)
-
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java
} return new URL(currentUrl); } /** * Gets child URL extraction rules from configuration. * * @param responseData the response data from crawling * @param resultData the result data * @return stream of tag-attribute pairs for URL extraction */ @OverrideRegistered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Aug 07 03:06:29 UTC 2025 - 54.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java
} catch (final Exception e) { throw new ExtractException(e); } } /** * Returns the encoding used for text extraction. * @return the encoding */ public String getEncoding() { return encoding; } /** * Sets the encoding. * @param encoding The encoding to set. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java
throw new FessSystemException("Could not find extractorFactory."); } final Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType()); if (logger.isDebugEnabled()) { logger.debug("url={}, extractor={}", responseData.getUrl(), extractor); } return extractor; }
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 3.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ExtractException.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.exception; /** * Exception thrown during the extraction process in the crawler. * This exception indicates a failure or error that occurred while extracting content from a crawled resource. * It extends {@link org.codelibs.fess.crawler.exception.CrawlerSystemException} and provides constructorsRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 3K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java
} /** * Gets the appropriate extractor for the given response data. * Selects an extractor based on the MIME type or falls back to the Tika extractor. * * @param responseData the response data containing the document to extract * @return the extractor instance for processing the document * @throws FessSystemException if no suitable extractor can be found */ @OverrideRegistered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 3.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java
this.propertyMap = propertyMap; } /** * Gets the map of child URL extraction rules. * * @return the child URL rule map */ public Map<String, String> getChildUrlRuleMap() { return childUrlRuleMap; } /** * Sets the map of child URL extraction rules. * * @param childUrlRuleMap the child URL rule map to set */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 28.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractor.java
*/ package org.codelibs.fess.crawler.extractor.impl; import java.io.File; import java.io.IOException; import java.util.List; import org.codelibs.fess.crawler.container.CrawlerContainer; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.2K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/DocumentHelper.java
import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.CrawlingAccessException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.impl.TikaExtractor; import org.codelibs.fess.crawler.processor.ResponseProcessor; import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor; import org.codelibs.fess.crawler.rule.Rule;
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Aug 07 03:06:29 UTC 2025 - 17.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java
* * @param in the input stream to extract text from * @param params additional parameters * @return the extracted data * @throws ExtractException if extraction fails */ @Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (logger.isDebugEnabled()) { logger.debug("Accessing {}", url); }Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 12.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java
import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import jakarta.mail.Address; import jakarta.mail.BodyPart; import jakarta.mail.Header; import jakarta.mail.Message;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 12.6K bytes - Viewed (0)