- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 22 for tika (0.09 sec)
-
fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml
<!-- an OLE2 (application/x-tika-msoffice) container. --> <!-- The are logically subclasses of (application/x-tika-ooxml), but their containers are literally subclasses of (application/x-tika-msoffice) --> <mime-type type="application/x-tika-ooxml-protected"> <sub-class-of type="application/x-tika-msoffice"/> <_comment>Password Protected OOXML File</_comment>
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Sep 21 06:46:43 UTC 2023 - 298.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java
import java.util.Set; import org.apache.tika.metadata.ClimateForcast; import org.apache.tika.metadata.CreativeCommons; import org.apache.tika.metadata.Geographic; import org.apache.tika.metadata.HttpHeaders; import org.apache.tika.metadata.Message; import org.apache.tika.metadata.TIFF; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.metadata.TikaMimeKeys; import org.apache.tika.parser.pdf.PDFParser; /**
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 2.5K bytes - Viewed (0) -
fess-crawler/pom.xml
<artifactId>tika-parser-html-module</artifactId> <version>${tika.version}</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-parser-image-module</artifactId> <version>${tika.version}</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-parser-mail-module</artifactId>
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:47:13 UTC 2024 - 11.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImpl.java
import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.Map; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MimeTypes; import org.apache.tika.mime.MimeTypesFactory; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.crawler.entity.ExtractData;
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 4.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java
import org.apache.commons.lang3.SystemUtils; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.Detector; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata;
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Sat Oct 12 01:41:37 UTC 2024 - 25K bytes - Viewed (0) -
src/main/resources/fess.xml
<property name="valueSeparator">"\\n"</property> <property name="roleSeparator">","</property> --> </component> <component name="tikaConfig" class="org.apache.tika.config.TikaConfig"> <arg>org.codelibs.fess.util.ResourceUtil.getConfOrClassesPath("tika.xml")</arg> </component>
Registered: Thu Oct 31 13:40:30 UTC 2024 - Last Modified: Sun Jul 28 09:03:48 UTC 2024 - 5.2K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/LanguageHelper.java
import java.util.Arrays; import java.util.Map; import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.tika.language.detect.LanguageDetector; import org.apache.tika.language.detect.LanguageResult; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil;
Registered: Thu Oct 31 13:40:30 UTC 2024 - Last Modified: Thu Feb 22 01:53:18 UTC 2024 - 5.5K bytes - Viewed (0) -
pom.xml
<groupId>com.ibm.icu</groupId> <artifactId>icu4j</artifactId> <version>${icu4j.version}</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-langdetect-optimaize</artifactId> <version>${tika.version}</version> <exclusions> <exclusion> <groupId>javax.annotation</groupId> <artifactId>javax.annotation-api</artifactId> </exclusion>
Registered: Thu Oct 31 13:40:30 UTC 2024 - Last Modified: Sat Oct 26 02:16:03 UTC 2024 - 49.1K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/job/CrawlJob.java
Registered: Thu Oct 31 13:40:30 UTC 2024 - Last Modified: Sun Jun 23 04:13:47 UTC 2024 - 15.1K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java
final String content = extractData.getContent(); CloseableUtil.closeQuietly(in); logger.info(content); assertTrue(content.contains("ใในใ")); } // TODO tika needs to support pdfbox 2.0 // public void test_getTika_pdf() { // final InputStream in = ResourceUtil // .getResourceAsStream("extractor/test.pdf");
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 30.5K bytes - Viewed (0)