- Sort Score
- Result 10 results
- Languages All
Results 21 - 30 of 31 for ExtractException (1.41 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.exception.MaxLengthExceededException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 4.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java
import org.codelibs.fess.crawler.container.CrawlerContainer; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.exception.UnsupportedExtractException; import jakarta.annotation.Resource; /** * Factory class for managing and retrieving {@link Extractor} instances.Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 7.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.exception.MaxLengthExceededException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 5.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java
import org.codelibs.core.io.InputStreamUtil; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; import org.commonmark.ext.front.matter.YamlFrontMatterExtension; import org.commonmark.ext.front.matter.YamlFrontMatterVisitor; import org.commonmark.node.AbstractVisitor; import org.commonmark.node.Heading;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 8.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java
import org.apache.logging.log4j.Logger; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content and metadata from CSV files. * This extractor provides better structured data extraction compared to Tika's generic text extraction. * * <p>Features: * <ul>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java
import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.util.XPathAPI; import org.codelibs.nekohtml.parsers.DOMParser; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.xml.sax.InputSource;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Oct 04 08:47:19 UTC 2025 - 10.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 9.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java
import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.util.TextUtil; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import jakarta.annotation.PostConstruct;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 30.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java
import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExecutionTimeoutException; import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content by executing an external command. */ public class CommandExtractor extends AbstractExtractor { private static final Logger logger = LogManager.getLogger(CommandExtractor.class);Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 16.1K bytes - Viewed (0) -
CLAUDE.md
### Exception Hierarchy ``` CrawlerSystemException (unchecked) # Setup/config errors CrawlingAccessException (checked) # Runtime crawl errors ├─ MaxLengthExceededException └─ ChildUrlsException ExtractException (checked) # Extraction failures └─ UnsupportedExtractException ``` ### Thread-Local Storage ```java // Set (in CrawlerThread) CrawlingParameterUtil.setCrawlerContext(context);
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Fri Nov 28 17:31:34 UTC 2025 - 10.7K bytes - Viewed (0)