Search Options

Results per page
Sort
Preferred Languages
Advance

Results 41 - 49 of 49 for ExtractData (0.06 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

        public TarExtractor() {
            super();
        }
    
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            validateInputStream(in);
    
            final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper();
            final ExtractorFactory extractorFactory = getExtractorFactory();
            return new ExtractData(getTextInternal(in, mimeTypeHelper, extractorFactory));
        }
    
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java

         * @return The password.
         */
        protected String getPassword(final Map<String, String> params) {
            final String url = params != null ? params.get(ExtractData.URL) : null;
            if (!passwordMap.isEmpty()) {
                final String resourceName = params != null ? params.get(ExtractData.RESOURCE_NAME_KEY) : null;
    
                String value = null;
                if (StringUtil.isNotEmpty(url)) {
                    value = url;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractorTest.java

    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.dbflute.utflute.core.PlainTestCase;
    
    /**
     * @author shinsuke
     *
     */
    public class HtmlExtractorTest extends PlainTestCase {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 3.7K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

                accessTimeoutTask = TimeoutManager.getInstance().addTimeoutTarget(accessTimeoutTarget, accessTimeout, false);
            }
    
            final ExtractData data = new ExtractData();
            final HttpPost httpPost = new HttpPost(url);
            final HttpEntity postEntity = MultipartEntityBuilder.create()
                    .setMode(HttpMultipartMode.BROWSER_COMPATIBLE)
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/TextTransformer.java

            }
            final Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType());
            final Map<String, String> params = new HashMap<>();
            params.put(ExtractData.RESOURCE_NAME_KEY, getResourceName(responseData));
            params.put(ExtractData.CONTENT_TYPE, responseData.getMimeType());
            String content = null;
            try (final InputStream in = responseData.getResponseBody()) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

    import java.util.concurrent.TimeUnit;
    import java.util.regex.Pattern;
    
    import javax.xml.xpath.XPathNodes;
    
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.util.XPathAPI;
    import org.codelibs.nekohtml.parsers.DOMParser;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Oct 04 08:47:19 UTC 2025
    - 10.4K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractorTest.java

            private InputStream lastValidatedStream = null;
    
            @Override
            public ExtractData getText(final InputStream in, final Map<String, String> params) {
                validateInputStream(in);
                validateCalled = true;
                lastValidatedStream = in;
                return new ExtractData("test content");
            }
    
            public boolean isValidateCalled() {
                return validateCalled;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 8.4K bytes
    - Viewed (0)
  8. CLAUDE.md

    ### Adding a Content Extractor
    
    1. **Implement `Extractor`**:
    ```java
    public class MyExtractor extends AbstractExtractor {
        @Override
        public ExtractData getText(InputStream in, Map<String, String> params) {
            ExtractData data = new ExtractData();
            // Extract text
            data.setContent(extractedText);
            return data;
        }
    }
    ```
    
    2. **Register**:
    ```xml
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
  9. README.md

    ```java
    public class CustomExtractor extends AbstractExtractor {
        @Override
        public ExtractData getText(final InputStream inputStream, final Map<String, String> params) {
            // Custom extraction logic
            ExtractData extractData = new ExtractData();
            // ... implementation
            return extractData;
        }
    }
    
    // Register custom extractor
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
Back to top