- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 43 for extraction (0.04 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java
/** * Default character encoding for content extraction. */ protected String encoding = Constants.UTF_8; /** * The preload size for charset detection. */ protected int preloadSizeForCharset = 2048; /** * Indicates whether comment tags should be ignored during extraction. */ protected boolean ignoreCommentTag = false; /**Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 8.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java
import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import org.codelibs.fess.crawler.util.IgnoreCloseInputStream; import jp.gr.java_conf.dangan.util.lha.LhaFile; import jp.gr.java_conf.dangan.util.lha.LhaHeader; /** * Extractor implementation for LHA (LZH) archive files.
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 5.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java
*/ package org.codelibs.fess.crawler.extractor; import java.io.InputStream; import java.util.Map; import org.codelibs.fess.crawler.entity.ExtractData; /** * The Extractor interface defines methods for extracting text data from an input stream. * Implementations of this interface should provide the logic for extracting text and * optionally override the default weight value. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java
* * <p>The extractor supports two types of password management: * <ul> * <li>Static passwords configured via {@link #addPassword(String, String)}</li> * <li>Dynamic passwords provided through extraction parameters</li> * </ul> * * <p>Passwords are matched against URLs or resource names using regular expression patterns.
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 5.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ExtractException.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.exception; /** * Exception thrown during the extraction process in the crawler. * This exception indicates a failure or error that occurred while extracting content from a crawled resource. * It extends {@link org.codelibs.fess.crawler.exception.CrawlerSystemException} and provides constructorsRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/UnsupportedExtractException.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.exception; /** * UnsupportedExtractException is thrown when the content extraction is not supported. * It extends ExtractException and indicates that the requested extraction operation cannot be performed. * */ public class UnsupportedExtractException extends ExtractException { private static final long serialVersionUID = 1L;Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 1.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java
} catch (final Exception e) { throw new ExtractException(e); } } /** * Returns the encoding used for text extraction. * @return the encoding */ public String getEncoding() { return encoding; } /** * Sets the encoding. * @param encoding The encoding to set. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractor.java
*/ package org.codelibs.fess.crawler.extractor.impl; import java.io.File; import java.io.IOException; import java.util.List; import org.codelibs.fess.crawler.container.CrawlerContainer; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java
return htmlTagPattern; } /** * Gets the pattern used for extracting charset from meta tags. * * @return the meta charset pattern */ public Pattern getMetaCharsetPattern() { return metaCharsetPattern; } /** * Sets the pattern used for extracting charset from meta tags. * * @param metaCharsetPattern the meta charset pattern to setRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 9.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java
* If no extractor exists for the key, a new array containing the extractor is created and associated with the key. * * @param key The key associated with the extractor. Must not be null or blank. * @param extractor The extractor to add. Must not be null. */ public void addExtractor(final String key, final Extractor extractor) { if (StringUtil.isBlank(key)) {Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 7.3K bytes - Viewed (0)