Search Options

Results per page
Sort
Preferred Languages
Advance

Results 21 - 30 of 31 for ExtractException (1.41 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java

    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 4.8K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java

    import org.codelibs.fess.crawler.container.CrawlerContainer;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.exception.UnsupportedExtractException;
    
    import jakarta.annotation.Resource;
    
    /**
     * Factory class for managing and retrieving {@link Extractor} instances.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 7.4K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java

    import org.codelibs.core.io.InputStreamUtil;
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.commonmark.ext.front.matter.YamlFrontMatterExtension;
    import org.commonmark.ext.front.matter.YamlFrontMatterVisitor;
    import org.commonmark.node.AbstractVisitor;
    import org.commonmark.node.Heading;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 8.2K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java

    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content and metadata from CSV files.
     * This extractor provides better structured data extraction compared to Tika's generic text extraction.
     *
     * <p>Features:
     * <ul>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.util.XPathAPI;
    import org.codelibs.nekohtml.parsers.DOMParser;
    import org.w3c.dom.Document;
    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Oct 04 08:47:19 UTC 2025
    - 10.4K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java

    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    import com.fasterxml.jackson.databind.JsonNode;
    import com.fasterxml.jackson.databind.ObjectMapper;
    import com.fasterxml.jackson.databind.node.ArrayNode;
    import com.fasterxml.jackson.databind.node.ObjectNode;
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 9.7K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.util.TextUtil;
    import org.xml.sax.ContentHandler;
    import org.xml.sax.SAXException;
    
    import jakarta.annotation.PostConstruct;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 30.8K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExecutionTimeoutException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content by executing an external command.
     */
    public class CommandExtractor extends AbstractExtractor {
        private static final Logger logger = LogManager.getLogger(CommandExtractor.class);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 16.1K bytes
    - Viewed (0)
  10. CLAUDE.md

    ### Exception Hierarchy
    
    ```
    CrawlerSystemException (unchecked)  # Setup/config errors
    CrawlingAccessException (checked)   # Runtime crawl errors
      ├─ MaxLengthExceededException
      └─ ChildUrlsException
    ExtractException (checked)          # Extraction failures
      └─ UnsupportedExtractException
    ```
    
    ### Thread-Local Storage
    
    ```java
    // Set (in CrawlerThread)
    CrawlingParameterUtil.setCrawlerContext(context);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
Back to top