Search Options

Results per page
Sort
Preferred Languages
Advance

Results 11 - 20 of 22 for tikai (0.05 sec)

  1. src/main/java/org/codelibs/fess/helper/LanguageHelper.java

    import java.util.Arrays;
    import java.util.Map;
    import java.util.stream.Collectors;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.apache.tika.language.detect.LanguageDetector;
    import org.apache.tika.language.detect.LanguageResult;
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.mylasta.direction.FessConfig;
    import org.codelibs.fess.util.ComponentUtil;
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 6.9K bytes
    - Viewed (0)
  2. pom.xml

    			<groupId>com.ibm.icu</groupId>
    			<artifactId>icu4j</artifactId>
    			<version>${icu4j.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>org.apache.tika</groupId>
    			<artifactId>tika-langdetect-optimaize</artifactId>
    			<version>${tika.version}</version>
    			<exclusions>
    				<exclusion>
    					<groupId>javax.annotation</groupId>
    					<artifactId>javax.annotation-api</artifactId>
    				</exclusion>
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Sep 04 05:22:58 UTC 2025
    - 49.6K bytes
    - Viewed (0)
  3. fess-crawler-lasta/src/main/resources/crawler/extractor.xml

    				"application/x-texinfo",
    				"application/x-tika-msoffice",
    				"application/x-tika-msoffice-embedded",
    				"application/x-tika-msoffice-embedded;format=ole10_native",
    				"application/x-tika-msoffice-embedded;format=comp_obj",
    				"application/x-tika-msworks-spreadsheet",
    				"application/x-tika-ooxml",
    				"application/x-tika-ooxml-protected",
    				"application/x-tika-staroffice",
    				"application/x-uc2-compressed",
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Aug 01 21:40:30 UTC 2020
    - 49K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

        public Logger getLogger() {
            return logger;
        }
    
        /**
         * Gets the appropriate extractor for the given response data.
         * Selects an extractor based on the MIME type or falls back to the Tika extractor.
         *
         * @param responseData the response data containing the document to extract
         * @return the extractor instance for processing the document
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  5. README.md

    ## Technology Stack
    
    - **Java**: 21+ (requires Java 21 or higher)
    - **Build System**: Maven 3.x
    - **DI Container**: LastaFlute DI
    - **HTTP Client**: Apache HttpComponents
    - **Content Extraction**: Apache Tika, Apache POI, PDFBox
    - **Testing**: JUnit 4, UTFlute, Testcontainers
    - **Storage Backends**: OpenSearch, Memory-based
    
    ## Quick Start
    
    ### Prerequisites
    
    - Java 21 or higher
    - Maven 3.6 or higher
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  6. src/main/java/org/codelibs/fess/job/CrawlJob.java

            } else {
                cmdList.add("-Dfess.log.level=" + logLevel);
                if ("debug".equalsIgnoreCase(logLevel)) {
                    cmdList.add("-Dorg.apache.tika.service.error.warn=true");
                }
            }
            stream(fessConfig.getJvmCrawlerOptionsAsArray())
                    .of(stream -> stream.filter(StringUtil::isNotBlank).forEach(value -> cmdList.add(value)));
    
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 19.6K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

            final String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("ใƒ†ใ‚นใƒˆ"));
        }
    
        // TODO tika needs to support pdfbox 2.0
        //    public void test_getTika_pdf() {
        //        final InputStream in = ResourceUtil
        //                .getResourceAsStream("extractor/test.pdf");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.6K bytes
    - Viewed (0)
  8. okhttp/src/jvmMain/resources/okhttp3/internal/publicsuffix/PublicSuffixDatabase.list

    toei.aichi.jp
    toga.toyama.jp
    togakushi.nagano.jp
    togane.chiba.jp
    togitsu.nagasaki.jp
    togliatti.su
    togo.aichi.jp
    togura.nagano.jp
    tohma.hokkaido.jp
    tohnosho.chiba.jp
    toho.fukuoka.jp
    tokai.aichi.jp
    tokai.ibaraki.jp
    tokamachi.niigata.jp
    tokashiki.okinawa.jp
    toki.gifu.jp
    tokigawa.saitama.jp
    tokke.no
    tokoname.aichi.jp
    tokorozawa.saitama.jp
    tokushima.jp
    tokushima.tokushima.jp
    Registered: Fri Sep 05 11:42:10 UTC 2025
    - Last Modified: Tue May 27 22:00:49 UTC 2025
    - 129.6K bytes
    - Viewed (3)
  9. src/main/resources/fess_config.properties

    # Type of hot thread monitoring (e.g., cpu).
    crawler.hotthread.type=cpu
    # Metadata fields to exclude from document content.
    crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.*
    # Mapping for document metadata names.
    crawler.metadata.name.mapping=\
    title=title:string\n\
    Title=title:string\n\
    dc:title=title:string\n\
    
    # html
    
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Sat Jul 05 14:45:37 UTC 2025
    - 54.7K bytes
    - Viewed (0)
  10. okhttp/src/androidMain/assets/PublicSuffixDatabase.list

    toei.aichi.jp
    toga.toyama.jp
    togakushi.nagano.jp
    togane.chiba.jp
    togitsu.nagasaki.jp
    togliatti.su
    togo.aichi.jp
    togura.nagano.jp
    tohma.hokkaido.jp
    tohnosho.chiba.jp
    toho.fukuoka.jp
    tokai.aichi.jp
    tokai.ibaraki.jp
    tokamachi.niigata.jp
    tokashiki.okinawa.jp
    toki.gifu.jp
    tokigawa.saitama.jp
    tokke.no
    tokoname.aichi.jp
    tokorozawa.saitama.jp
    tokushima.jp
    tokushima.tokushima.jp
    Registered: Fri Sep 05 11:42:10 UTC 2025
    - Last Modified: Tue Dec 31 14:50:53 UTC 2024
    - 129.6K bytes
    - Viewed (2)
Back to top