Search Options

Results per page
Sort
Preferred Languages
Advance

Results 31 - 36 of 36 for ExtractData (0.2 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java

    import java.io.IOException;
    import java.io.InputStream;
    import java.util.Map;
    
    import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Gets a text from . file.
     *
     * @author shinsuke
     *
     */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

        }
    
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            if (in == null) {
                throw new CrawlerSystemException("The inputstream is null.");
            }
    
            final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper();
            final ExtractorFactory extractorFactory = getExtractorFactory();
            return new ExtractData(getTextInternal(in, mimeTypeHelper, extractorFactory));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 5K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java

         * @return The password.
         */
        protected String getPassword(final Map<String, String> params) {
            final String url = params != null ? params.get(ExtractData.URL) : null;
            if (!passwordMap.isEmpty()) {
                final String resourceName = params != null ? params.get(ExtractData.RESOURCE_NAME_KEY) : null;
    
                String value = null;
                if (StringUtil.isNotEmpty(url)) {
                    value = url;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractorTest.java

    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.dbflute.utflute.core.PlainTestCase;
    
    /**
     * @author shinsuke
     *
     */
    public class HtmlExtractorTest extends PlainTestCase {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 3.7K bytes
    - Viewed (0)
  5. README.md

    ```java
    public class CustomExtractor extends AbstractExtractor {
        @Override
        public ExtractData getText(final InputStream inputStream, final Map<String, String> params) {
            // Custom extraction logic
            ExtractData extractData = new ExtractData();
            // ... implementation
            return extractData;
        }
    }
    
    // Register custom extractor
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

    import java.util.concurrent.TimeUnit;
    import java.util.regex.Pattern;
    
    import javax.xml.xpath.XPathNodes;
    
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.util.XPathAPI;
    import org.codelibs.nekohtml.parsers.DOMParser;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
Back to top