- Sort Score
- Num 10 results
- Language All
Results 1 - 10 of 17 for tika (0.02 seconds)
-
fess-crawler/pom.xml
<artifactId>tika-parser-html-module</artifactId> <version>${tika.version}</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-parser-image-module</artifactId> <version>${tika.version}</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-parser-mail-module</artifactId>
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Sun Mar 29 01:35:48 GMT 2026 - 12.5K bytes - Click Count (0) -
fess-crawler-lasta/src/main/resources/crawler/extractor.xml
"application/x-js-taro", "application/x-tika-msoffice", "application/x-tika-msoffice-embedded", "application/x-tika-msoffice-embedded;format=ole10_native", "application/x-tika-msoffice-embedded;format=comp_obj", "application/x-tika-msworks-spreadsheet", "application/x-tika-ooxml", "application/x-tika-ooxml-protected", "application/x-tika-staroffice", "application/x-uc2-compressed",
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Wed Feb 11 01:15:55 GMT 2026 - 50.4K bytes - Click Count (0) -
CLAUDE.md
- **License**: Apache 2.0 - **DI**: LastaFlute DI - **Repo**: https://github.com/codelibs/fess-crawler ### Tech Stack - **HTTP**: Apache HttpComponents 4.5+ and 5.x (switchable) - **Extraction**: Apache Tika, POI, PDFBox - **Testing**: JUnit 4, UTFlute, Mockito, Testcontainers - **Storage**: In-memory (default), OpenSearch (optional) - **Cloud**: AWS SDK v2 (S3), Google Cloud Storage ### Protocols
Created: Sun Apr 12 03:50:13 GMT 2026 - Last Modified: Thu Mar 12 03:39:20 GMT 2026 - 8.1K bytes - Click Count (0) -
src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java
public Logger getLogger() { return logger; } /** * Gets the appropriate extractor for the given response data. * Selects an extractor based on the MIME type or falls back to the Tika extractor. * * @param responseData the response data containing the document to extract * @return the extractor instance for processing the document
Created: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Fri Nov 28 16:29:12 GMT 2025 - 3.8K bytes - Click Count (0) -
src/test/java/org/codelibs/fess/crawler/helper/FessMimeTypeHelperTest.java
try (InputStream is = new ByteArrayInputStream(SQL_REM_CONTENT.getBytes(StandardCharsets.UTF_8))) { final String contentType = mimeTypeHelper.getContentType(is, "test.sql"); // Without override, Tika detects based on content+filename assertNotNull(contentType); } } @Test public void test_init_nullConfig() throws IOException {
Created: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Sat Jan 24 09:06:33 GMT 2026 - 12.1K bytes - Click Count (0) -
pom.xml
<groupId>com.ibm.icu</groupId> <artifactId>icu4j</artifactId> <version>${icu4j.version}</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-langdetect-optimaize</artifactId> <version>${tika.version}</version> <exclusions> <exclusion> <groupId>javax.annotation</groupId> <artifactId>javax.annotation-api</artifactId> </exclusion>
Created: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Thu Mar 19 07:04:54 GMT 2026 - 49.9K bytes - Click Count (0) -
src/main/resources/fess.xml
<property name="valueSeparator">"\\n"</property> <property name="roleSeparator">","</property> --> </component> <component name="tikaConfig" class="org.apache.tika.config.TikaConfig"> <arg>org.codelibs.fess.util.ResourceUtil.getConfOrClassesPath("tika.xml")</arg> </component>
Created: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Thu Mar 26 02:24:08 GMT 2026 - 5.5K bytes - Click Count (0) -
src/main/java/org/codelibs/fess/helper/LanguageHelper.java
import java.util.Arrays; import java.util.Map; import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.tika.language.detect.LanguageDetector; import org.apache.tika.language.detect.LanguageResult; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil;
Created: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Fri Nov 28 16:29:12 GMT 2025 - 6.9K bytes - Click Count (0) -
src/main/resources/fess_config.properties
# Type of hot thread monitoring (e.g., cpu). crawler.hotthread.type=cpu # Metadata fields to exclude from document content. crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.* # Mapping for document metadata names. crawler.metadata.name.mapping=\ title=title:string\n\ Title=title:string\n\ dc:title=title:string\n\ # html
Created: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Sat Mar 28 06:59:19 GMT 2026 - 59.3K bytes - Click Count (0) -
src/main/java/org/codelibs/fess/job/CrawlJob.java
Created: Tue Mar 31 13:07:34 GMT 2026 - Last Modified: Fri Nov 28 16:29:12 GMT 2025 - 19.6K bytes - Click Count (0)