- Sort Score
- Result 10 results
- Languages All
Results 11 - 18 of 18 for AbstractExtractor (0.06 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java
import com.google.common.base.Charsets; import jakarta.annotation.PostConstruct; import jakarta.annotation.PreDestroy; /** * Extract a text by using external http server. */ public class ApiExtractor extends AbstractExtractor { private static final Logger logger = LogManager.getLogger(ApiExtractor.class); /** The URL of the API endpoint. */ protected String url; /** The access timeout in seconds. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 12.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java
import org.codelibs.fess.crawler.util.IgnoreCloseInputStream; import jakarta.annotation.Resource; /** * Extracts text content from TAR archives. */ public class TarExtractor extends AbstractExtractor { private static final Logger logger = LogManager.getLogger(TarExtractor.class); /** * The archive stream factory. */ @Resource protected ArchiveStreamFactory archiveStreamFactory;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java
* The extractor first tries to match against the URL, then falls back to the resource name if available. * * @author shinsuke */ public abstract class PasswordBasedExtractor extends AbstractExtractor { /** Logger instance for this class. */ private static final Logger logger = LogManager.getLogger(PasswordBasedExtractor.class); /** Map of regex patterns to passwords for static password configuration. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 5.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java
import jakarta.mail.internet.MimeMessage; import jakarta.mail.internet.MimeUtility; /** * Gets a text from .eml file. * * @author shinsuke * */ public class EmlExtractor extends AbstractExtractor { /** Array of day of week abbreviations used for parsing received dates */ private static final String[] DAY_OF_WEEK = { "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" }; /** Logger instance for this class */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 12.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java
* It handles encoding detection, HTML entity unescaping, and tag-based content extraction. * */ public abstract class AbstractXmlExtractor extends AbstractExtractor { /** * Logger for this class. */ protected static final Logger logger = LogManager.getLogger(AbstractXmlExtractor.class); /** * UTF-7 Byte Order Mark definition. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 8.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java
import jakarta.annotation.PostConstruct; import jakarta.annotation.PreDestroy; /** * Extracts text content from various document formats using JODConverter. */ public class JodExtractor extends AbstractExtractor { /** Logger for this class. */ private static final Logger logger = LogManager.getLogger(JodExtractor.class); /** Office manager for document conversion. */ protected OfficeManager officeManager;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java
import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content by executing an external command. */ public class CommandExtractor extends AbstractExtractor { private static final Logger logger = LogManager.getLogger(CommandExtractor.class); /** The encoding for the output. */ protected String outputEncoding = Constants.UTF_8;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 16K bytes - Viewed (0) -
README.md
} // Wait for completion crawler.awaitTermination(); System.out.println("Crawling completed"); ``` ### Custom Content Extractor ```java public class CustomExtractor extends AbstractExtractor { @Override public ExtractData getText(final InputStream inputStream, final Map<String, String> params) { // Custom extraction logic ExtractData extractData = new ExtractData();
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0)