Search Options

Results per page
Sort
Preferred Languages
Advance

Results 11 - 20 of 29 for PDF (0.01 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java

     * </p>
     *
     * <p>
     * Example usage:
     * </p>
     *
     * <pre>
     * {@code
     * try (InputStream in = new FileInputStream("example.pdf")) {
     *     ExtractData extractData = new ExtractorBuilder(crawlerContainer, in, new HashMap<>())
     *         .mimeType("application/pdf")
     *         .filename("example.pdf")
     *         .maxContentLength(1024 * 1024)
     *         .extract();
     *
     *     String content = extractData.getContent();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.1K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

            String sessionId = "test-session-019";
            urlFilter.init(sessionId);
    
            urlFilter.addInclude(".*\\.PDF$");
    
            // Test case sensitivity
            assertFalse(urlFilter.match("https://example.com/document.pdf"));
            assertTrue(urlFilter.match("https://example.com/document.PDF"));
        }
    
        /**
         * Test very long URL handling
         */
        public void test_veryLongUrl() {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 19K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

     *   <li>Maximum term sizes for alphanumeric and symbolic terms</li>
     *   <li>Custom Tika configuration</li>
     *   <li>Tesseract OCR configuration for image-based documents</li>
     *   <li>PDF Parser configuration for PDF documents</li>
     * </ul>
     *
     * <p>
     * The {@link TikaDetectParser} inner class extends {@link CompositeParser} to provide auto-detection of the MIME type
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.7K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/ExtractorFactoryTest.java

            extractorFactory.addExtractor("application/vnd.ms-powerpoint", tikaExtractor);
            extractorFactory.addExtractor("application/vnd.visio", tikaExtractor);
            extractorFactory.addExtractor("application/pdf", pdfExtractor);
            extractorFactory.addExtractor("application/x-lha", lhaExtractor);
            extractorFactory.addExtractor("application/x-lharc", lhaExtractor);
    
        }
    
        public void test_addExtractor() {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.9K bytes
    - Viewed (0)
  5. docs/compression/README.md

    ```
    
    Default config includes most common highly compressible content extensions and mime-types.
    
    ```bash
    ~ mc admin config set myminio compression extensions=".pdf" mime_types="application/pdf"
    ```
    
    To show help on setting compression config values.
    
    ```bash
    ~ mc admin config set myminio compression
    ```
    
    To enable compression for all content, no matter the extension and content type
    Registered: Sun Sep 07 19:28:11 UTC 2025
    - Last Modified: Tue Aug 12 18:20:36 UTC 2025
    - 5.2K bytes
    - Viewed (0)
  6. README.md

    crawler.crawlerContext.setDefaultIntervalTime(1000); // 1 second
    ```
    
    ### URL Filtering
    
    ```java
    // Include patterns
    crawler.urlFilter.addInclude("https://example.com/.*");
    crawler.urlFilter.addInclude(".*\\.pdf$");
    
    // Exclude patterns  
    crawler.urlFilter.addExclude(".*\\.js$");
    crawler.urlFilter.addExclude(".*login.*");
    ```
    
    ## Supported Protocols and Formats
    
    ### Protocols
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  7. cmd/erasure-metadata-utils_test.go

    		hashedOrder []int
    	}{
    		// cases which should pass the test.
    		// passing in valid object name.
    		{"object", []int{14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}},
    		{"The Shining Script <v1>.pdf", []int{16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}},
    		{"Cost Benefit Analysis (2009-2010).pptx", []int{15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}},
    Registered: Sun Sep 07 19:28:11 UTC 2025
    - Last Modified: Fri Aug 29 02:39:48 UTC 2025
    - 7.3K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractorTest.java

                        TikaExtractor tikaExtractor = container.getComponent("tikaExtractor");
                        factory.addExtractor("application/pdf", tikaExtractor);
                    });
            emlExtractor = container.getComponent("emlExtractor");
        }
    
        public void test_getText() throws IOException {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 4.6K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleManagerTest.java

            UrlPatternRule rule1 = new UrlPatternRule("httpRule", "https?://.*");
            UrlPatternRule rule2 = new UrlPatternRule("exampleRule", ".*example\\.com.*");
            UrlPatternRule rule3 = new UrlPatternRule("pdfRule", ".*\\.pdf$");
    
            ruleManager.addRule(rule1);
            ruleManager.addRule(rule2);
            ruleManager.addRule(rule3);
    
            ResponseData responseData1 = new ResponseData();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 23.8K bytes
    - Viewed (0)
  10. fess-crawler/pom.xml

    			<artifactId>tika-parser-ocr-module</artifactId>
    			<version>${tika.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>org.apache.tika</groupId>
    			<artifactId>tika-parser-pdf-module</artifactId>
    			<version>${tika.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>org.apache.tika</groupId>
    			<artifactId>tika-parser-pkg-module</artifactId>
    			<version>${tika.version}</version>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 11.3K bytes
    - Viewed (0)
Back to top