- Sort Score
- Result 10 results
- Languages All
Results 21 - 30 of 158 for extractors (0.04 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java
import org.commonmark.node.Node; import org.commonmark.node.Text; import org.commonmark.parser.Parser; import org.commonmark.renderer.text.TextContentRenderer; /** * Extracts text content and metadata from Markdown files. * This extractor provides better structured data extraction compared to Tika's generic text extraction. * * <p>Features: * <ul> * <li>YAML front matter metadata extraction</li>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 8.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java
/* * (non-Javadoc) * * @see * org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor#getEncodingPattern() */ @Override protected Pattern getEncodingPattern() { return metaCharsetPattern; } /* * (non-Javadoc) * * @see org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor#getTagPattern() */ @OverrideRegistered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Oct 04 08:47:19 UTC 2025 - 9.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java
dfos = out; CopyUtil.copy(in, out); out.flush(); Extractor extractor = StringUtil.isBlank(mimeType) ? null : extractorFactory.getExtractor(mimeType); if (extractor == null) { final String detectedMimeType = getMimeType(out); extractor = extractorFactory.getExtractor(detectedMimeType);Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java
final Extractor extractor = extractorFactory.getExtractor(mimeType); if (extractor != null) { try { final Map<String, String> map = new HashMap<>(); map.put(ExtractData.RESOURCE_NAME_KEY, filename); buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 4.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java
final Extractor extractor = extractorFactory.getExtractor(mimeType); if (extractor != null) { try { final Map<String, String> map = new HashMap<>(); map.put(ExtractData.RESOURCE_NAME_KEY, filename); buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 5.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import java.util.Map; import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.util.regex.Pattern; /** * Extracts text content from XML documents. */ public class XmlExtractor extends AbstractXmlExtractor { /** * Creates a new XmlExtractor instance. */Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java
*/ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import java.util.Map; import org.codelibs.core.io.InputStreamUtil; import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content from an input stream as plain text. */
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 12.2K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ArchiveExtractorErrorHandlingTest.java
import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl; import org.dbflute.utflute.core.PlainTestCase; /** * Test class for archive extractor error handling improvements. * Tests partial extraction, error recovery, and improved error messages. */
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 12.6K bytes - Viewed (0)