Search Options

Results per page
Sort
Preferred Languages
Advance

Results 21 - 30 of 158 for extractors (0.07 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java

    import org.commonmark.node.Node;
    import org.commonmark.node.Text;
    import org.commonmark.parser.Parser;
    import org.commonmark.renderer.text.TextContentRenderer;
    
    /**
     * Extracts text content and metadata from Markdown files.
     * This extractor provides better structured data extraction compared to Tika's generic text extraction.
     *
     * <p>Features:
     * <ul>
     *   <li>YAML front matter metadata extraction</li>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 8.2K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

        /*
         * (non-Javadoc)
         *
         * @see
         * org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor#getEncodingPattern()
         */
        @Override
        protected Pattern getEncodingPattern() {
            return metaCharsetPattern;
        }
    
        /*
         * (non-Javadoc)
         *
         * @see org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor#getTagPattern()
         */
        @Override
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Oct 04 08:47:19 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java

                dfos = out;
                CopyUtil.copy(in, out);
                out.flush();
    
                Extractor extractor = StringUtil.isBlank(mimeType) ? null : extractorFactory.getExtractor(mimeType);
                if (extractor == null) {
                    final String detectedMimeType = getMimeType(out);
                    extractor = extractorFactory.getExtractor(detectedMimeType);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.1K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java

                        final Extractor extractor = extractorFactory.getExtractor(mimeType);
                        if (extractor != null) {
                            try {
                                final Map<String, String> map = new HashMap<>();
                                map.put(ExtractData.RESOURCE_NAME_KEY, filename);
                                buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 4.8K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

                        final Extractor extractor = extractorFactory.getExtractor(mimeType);
                        if (extractor != null) {
                            try {
                                final Map<String, String> map = new HashMap<>();
                                map.put(ExtractData.RESOURCE_NAME_KEY, filename);
                                buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.IOException;
    import java.io.InputStream;
    import java.util.Map;
    
    import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 2K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java

     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.util.regex.Pattern;
    
    /**
     * Extracts text content from XML documents.
     */
    public class XmlExtractor extends AbstractXmlExtractor {
    
        /**
         * Creates a new XmlExtractor instance.
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java

     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.Map;
    
    import org.codelibs.core.io.InputStreamUtil;
    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content from an input stream as plain text.
     */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 2K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

                try {
                    httpClient.close();
                } catch (final IOException e) {
                    logger.warn("Failed to close HTTP client for API extractor", e);
                }
            }
        }
    
        /**
         * Extracts text from the input stream using the API endpoint.
         *
         * @param in the input stream to extract text from
         * @param params additional parameters
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ArchiveExtractorErrorHandlingTest.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    import org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl;
    import org.dbflute.utflute.core.PlainTestCase;
    
    /**
     * Test class for archive extractor error handling improvements.
     * Tests partial extraction, error recovery, and improved error messages.
     */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.6K bytes
    - Viewed (0)
Back to top