Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 24 for PDF (0.01 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

            url = "http://test.com/hoge1.pdf";
            resourceName = null;
            assertNull(tikaExtractor.getPassword(createParams(url, resourceName)));
    
            url = "http://test.com/hoge1.pdf";
            resourceName = "hoge2.pdf";
            assertNull(tikaExtractor.getPassword(createParams(url, resourceName)));
    
            url = null;
            resourceName = "hoge2.pdf";
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.6K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

    /**
     * PdfExtractor extracts text content from PDF files using Apache PDFBox.
     * It supports password-protected PDFs and can extract embedded documents and annotations.
     *
     * <p>The extractor runs text extraction in a separate thread with a configurable timeout
     * to prevent hanging on problematic PDF files. It also extracts metadata from the PDF
     * document and includes it in the extraction result.
     *
     * <p>Features:
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.7K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java

            url = "http://test.com/hoge1.pdf";
            resourceName = null;
            params.put(ExtractData.URL, url);
            params.put(ExtractData.RESOURCE_NAME_KEY, resourceName);
            assertNull(pdfExtractor.getPassword(params));
    
            url = "http://test.com/hoge1.pdf";
            resourceName = "hoge2.pdf";
            params.put(ExtractData.URL, url);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 7.6K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImplTest.java

                    "hoge.pptx");
    
            assertContentType("image/jpeg", null, "hoge.jpg");
            assertContentType("image/gif", null, "hoge.gif");
    
            assertContentType("application/pdf", "extractor/test.pdf", "hoge.pdf");
    
            assertContentType("application/gzip", "extractor/gz/test.tar.gz", "hoge.tar.gz");
            assertContentType("application/zip", "extractor/zip/test.zip", "hoge.zip");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 11.6K bytes
    - Viewed (0)
  5. docs/debugging/README.md

            "fe012443-6ba9-4ef2-bb94-b729d2060c78/test123/testw3c.pdf/xl.meta": {"Versions":[{"Type":1,"V2Obj":{"ID":"aGEA/ZUOR4ueRIZsAgfDqA==","DDir":"9MMwM47bS+K6KvQqN3hlDw==","EcAlgo":1,"EcM":2,"EcN":2,"EcBSize":1048576,"EcIndex":1,"EcDist":[4,1,2,3],"CSumAl...
    Registered: Sun Sep 07 19:28:11 UTC 2025
    - Last Modified: Tue Aug 12 18:20:36 UTC 2025
    - 8.6K bytes
    - Viewed (0)
  6. cmd/erasure-sets_test.go

    	testCases := []struct {
    		objectName string
    		sipHash    int
    	}{
    		// cases which should pass the test.
    		// passing in valid object name.
    		{"object", 37},
    		{"The Shining Script <v1>.pdf", 38},
    		{"Cost Benefit Analysis (2009-2010).pptx", 59},
    		{"117Gn8rfHL2ACARPAhaFd0AGzic9pUbIA/5OCn5A", 35},
    		{"SHØRT", 49},
    		{"There are far too many object names, and far too few bucket names!", 8},
    		{"a/b/c/", 159},
    Registered: Sun Sep 07 19:28:11 UTC 2025
    - Last Modified: Fri Aug 29 02:39:48 UTC 2025
    - 6.8K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

            // Presentation Formats
            extensionMap.put("odp", "pdf");
            extensionMap.put("otp", "pdf");
            extensionMap.put("sxi", "pdf");
            extensionMap.put("ppt", "pdf");
            extensionMap.put("pptx", "pdf");
            // Drawing Formats
            extensionMap.put("odg", "svg");
            extensionMap.put("otg", "svg");
    
            extractorMap.put("pdf", new PdfExtractor());
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java

     * </p>
     *
     * <p>
     * Example usage:
     * </p>
     *
     * <pre>
     * {@code
     * try (InputStream in = new FileInputStream("example.pdf")) {
     *     ExtractData extractData = new ExtractorBuilder(crawlerContainer, in, new HashMap<>())
     *         .mimeType("application/pdf")
     *         .filename("example.pdf")
     *         .maxContentLength(1024 * 1024)
     *         .extract();
     *
     *     String content = extractData.getContent();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.1K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

            String sessionId = "test-session-019";
            urlFilter.init(sessionId);
    
            urlFilter.addInclude(".*\\.PDF$");
    
            // Test case sensitivity
            assertFalse(urlFilter.match("https://example.com/document.pdf"));
            assertTrue(urlFilter.match("https://example.com/document.PDF"));
        }
    
        /**
         * Test very long URL handling
         */
        public void test_veryLongUrl() {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 19K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

     *   <li>Maximum term sizes for alphanumeric and symbolic terms</li>
     *   <li>Custom Tika configuration</li>
     *   <li>Tesseract OCR configuration for image-based documents</li>
     *   <li>PDF Parser configuration for PDF documents</li>
     * </ul>
     *
     * <p>
     * The {@link TikaDetectParser} inner class extends {@link CompositeParser} to provide auto-detection of the MIME type
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.7K bytes
    - Viewed (0)
Back to top