Search Options

Display Count
Sort
Preferred Language
Advanced Search

Results 1 - 10 of 11 for tika (0.03 seconds)

  1. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

        public Logger getLogger() {
            return logger;
        }
    
        /**
         * Gets the appropriate extractor for the given response data.
         * Selects an extractor based on the MIME type or falls back to the Tika extractor.
         *
         * @param responseData the response data containing the document to extract
         * @return the extractor instance for processing the document
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 3.8K bytes
    - Click Count (0)
  2. src/test/java/org/codelibs/fess/crawler/helper/FessMimeTypeHelperTest.java

            try (InputStream is = new ByteArrayInputStream(SQL_REM_CONTENT.getBytes(StandardCharsets.UTF_8))) {
                final String contentType = mimeTypeHelper.getContentType(is, "test.sql");
                // Without override, Tika detects based on content+filename
                assertNotNull(contentType);
            }
        }
    
        @Test
        public void test_init_nullConfig() throws IOException {
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Sat Jan 24 09:06:33 GMT 2026
    - 12.1K bytes
    - Click Count (0)
  3. ADDING_NEW_LANGUAGE.md

    3. **Fallback**: English (from `fess_label.properties` and `fess_message.properties`)
    
    ### Document Language Detection
    
    During crawling and indexing, Fess:
    
    1. Detects language from document content using Apache Tika
    2. Validates against `supported.languages` list
    3. Creates language-specific fields (e.g., `content_ja`, `title_en`, `content_sv`)
    4. Applies language-specific analyzers for better search results
    
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Thu Nov 06 11:36:30 GMT 2025
    - 10.4K bytes
    - Click Count (1)
  4. pom.xml

    			<groupId>com.ibm.icu</groupId>
    			<artifactId>icu4j</artifactId>
    			<version>${icu4j.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>org.apache.tika</groupId>
    			<artifactId>tika-langdetect-optimaize</artifactId>
    			<version>${tika.version}</version>
    			<exclusions>
    				<exclusion>
    					<groupId>javax.annotation</groupId>
    					<artifactId>javax.annotation-api</artifactId>
    				</exclusion>
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Thu Mar 19 07:04:54 GMT 2026
    - 49.9K bytes
    - Click Count (0)
  5. src/main/resources/fess.xml

    		<property name="valueSeparator">"\\n"</property>
    		<property name="roleSeparator">","</property>
    		 -->
    	</component>
    	<component name="tikaConfig" class="org.apache.tika.config.TikaConfig">
    		<arg>org.codelibs.fess.util.ResourceUtil.getConfOrClassesPath("tika.xml")</arg>
    	</component>
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Thu Mar 26 02:24:08 GMT 2026
    - 5.5K bytes
    - Click Count (0)
  6. src/main/java/org/codelibs/fess/helper/LanguageHelper.java

    import java.util.Arrays;
    import java.util.Map;
    import java.util.stream.Collectors;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.apache.tika.language.detect.LanguageDetector;
    import org.apache.tika.language.detect.LanguageResult;
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.mylasta.direction.FessConfig;
    import org.codelibs.fess.util.ComponentUtil;
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 6.9K bytes
    - Click Count (0)
  7. src/main/resources/fess_config.properties

    # Type of hot thread monitoring (e.g., cpu).
    crawler.hotthread.type=cpu
    # Metadata fields to exclude from document content.
    crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.*
    # Mapping for document metadata names.
    crawler.metadata.name.mapping=\
    title=title:string\n\
    Title=title:string\n\
    dc:title=title:string\n\
    
    # html
    
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Sat Mar 28 06:59:19 GMT 2026
    - 59.3K bytes
    - Click Count (0)
  8. src/main/java/org/codelibs/fess/job/CrawlJob.java

            } else {
                cmdList.add("-Dfess.log.level=" + logLevel);
                if ("debug".equalsIgnoreCase(logLevel)) {
                    cmdList.add("-Dorg.apache.tika.service.error.warn=true");
                }
            }
            stream(fessConfig.getJvmCrawlerOptionsAsArray())
                    .of(stream -> stream.filter(StringUtil::isNotBlank).forEach(value -> cmdList.add(value)));
    
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Nov 28 16:29:12 GMT 2025
    - 19.6K bytes
    - Click Count (0)
  9. src/main/resources/fess_indices/_aws/fess.json

    "taču", "nu", "pat", "tiklab", "iekšpus", "nedz", "tik", "nevis", "turpretim", "jeb", "iekam", "iekām", "iekāms", "kolīdz", "līdzko", "tiklīdz", "jebšu", "tālab", "tāpēc", "nekā", "itin", "jā", "jau", "jel", "nē", "nezin", "tad", "tikai", "vis", "tak", "iekams", "vien", "# modal verbs", "būt  ", "biju ", "biji", "bija", "bijām", "bijāt", "esmu", "esi", "esam", "esat ", "būšu     ", "būsi", "būs", "būsim", "būsiet", "tikt", "tiku", "tiki", "tika", "tikām", "tikāt", "tieku", "tiec", "tiek", "tiekam",...
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Sun Mar 15 07:52:55 GMT 2026
    - 117.5K bytes
    - Click Count (0)
  10. src/main/resources/fess_indices/_cloud/fess.json

    "taču", "nu", "pat", "tiklab", "iekšpus", "nedz", "tik", "nevis", "turpretim", "jeb", "iekam", "iekām", "iekāms", "kolīdz", "līdzko", "tiklīdz", "jebšu", "tālab", "tāpēc", "nekā", "itin", "jā", "jau", "jel", "nē", "nezin", "tad", "tikai", "vis", "tak", "iekams", "vien", "# modal verbs", "būt  ", "biju ", "biji", "bija", "bijām", "bijāt", "esmu", "esi", "esam", "esat ", "būšu     ", "būsi", "būs", "būsim", "būsiet", "tikt", "tiku", "tiki", "tika", "tikām", "tikāt", "tieku", "tiec", "tiek", "tiekam",...
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Sun Mar 15 07:52:55 GMT 2026
    - 117.5K bytes
    - Click Count (0)
Back to Top