Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 91 for PDF$ (0.13 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

    /**
     * PdfExtractor extracts text content from PDF files using Apache PDFBox.
     * It supports password-protected PDFs and can extract embedded documents and annotations.
     *
     * <p>The extractor runs text extraction in a separate thread with a configurable timeout
     * to prevent hanging on problematic PDF files. It also extracts metadata from the PDF
     * document and includes it in the extraction result.
     *
     * <p>Features:
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java

            url = "http://test.com/hoge1.pdf";
            resourceName = null;
            params.put(ExtractData.URL, url);
            params.put(ExtractData.RESOURCE_NAME_KEY, resourceName);
            assertNull(pdfExtractor.getPassword(params));
    
            url = "http://test.com/hoge1.pdf";
            resourceName = "hoge2.pdf";
            params.put(ExtractData.URL, url);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 7.6K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

            url = "http://test.com/hoge1.pdf";
            resourceName = null;
            assertNull(tikaExtractor.getPassword(createParams(url, resourceName)));
    
            url = "http://test.com/hoge1.pdf";
            resourceName = "hoge2.pdf";
            assertNull(tikaExtractor.getPassword(createParams(url, resourceName)));
    
            url = null;
            resourceName = "hoge2.pdf";
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.6K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/ExtractDataTest.java

            data.putValue(ExtractData.RESOURCE_NAME_KEY, "test.pdf");
            data.putValue(ExtractData.URL, "https://example.com/test.pdf");
            data.putValues(ExtractData.FILE_PASSWORDS, new String[] { "pass1", "pass2" });
    
            assertEquals("test.pdf", data.getValues(ExtractData.RESOURCE_NAME_KEY)[0]);
            assertEquals("https://example.com/test.pdf", data.getValues(ExtractData.URL)[0]);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 9.9K bytes
    - Viewed (0)
  5. docs/debugging/README.md

            "fe012443-6ba9-4ef2-bb94-b729d2060c78/test123/testw3c.pdf/xl.meta": {"Versions":[{"Type":1,"V2Obj":{"ID":"aGEA/ZUOR4ueRIZsAgfDqA==","DDir":"9MMwM47bS+K6KvQqN3hlDw==","EcAlgo":1,"EcM":2,"EcN":2,"EcBSize":1048576,"EcIndex":1,"EcDist":[4,1,2,3],"CSumAl...
    Registered: Sun Dec 28 19:28:13 UTC 2025
    - Last Modified: Tue Aug 12 18:20:36 UTC 2025
    - 8.6K bytes
    - Viewed (0)
  6. src/test/java/jcifs/smb1/util/MimeMapTest.java

            void testCaseInsensitiveExtensions() throws IOException {
                assertEquals("application/pdf", mimeMap.getMimeType("PDF"));
                assertEquals("application/pdf", mimeMap.getMimeType("Pdf"));
                assertEquals("application/pdf", mimeMap.getMimeType("pDf"));
                assertEquals("text/html", mimeMap.getMimeType("HTML"));
                assertEquals("text/html", mimeMap.getMimeType("HtMl"));
    Registered: Sat Dec 20 13:44:44 UTC 2025
    - Last Modified: Thu Aug 14 05:31:44 UTC 2025
    - 9.1K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImplTest.java

                    "hoge.pptx");
    
            assertContentType("image/jpeg", null, "hoge.jpg");
            assertContentType("image/gif", null, "hoge.gif");
    
            assertContentType("application/pdf", "extractor/test.pdf", "hoge.pdf");
    
            assertContentType("application/gzip", "extractor/gz/test.tar.gz", "hoge.tar.gz");
            assertContentType("application/zip", "extractor/zip/test.zip", "hoge.zip");
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 11.6K bytes
    - Viewed (0)
  8. okhttp/src/jvmTest/kotlin/okhttp3/MultipartBodyTest.kt

          |Content-Type: application/pdf; charset=utf-8
          |
          |Jesse’s Resumé
          |--AaB03x--
          |
          """.trimMargin().replace("\n", "\r\n")
        val body =
          MultipartBody
            .Builder("AaB03x")
            .setType(MultipartBody.FORM)
            .addFormDataPart(
              "attachment",
              "resumé.pdf",
    Registered: Fri Dec 26 11:42:13 UTC 2025
    - Last Modified: Wed Mar 19 19:25:20 UTC 2025
    - 10.5K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractorEnhancedTest.java

            final Map<String, String> params = new HashMap<>();
            params.put(ExtractData.RESOURCE_NAME_KEY, "test-document.pdf");
    
            final ExtractData result = filenameExtractor.getText(in, params);
    
            assertNotNull(result);
            assertEquals("test-document.pdf", result.getContent());
        }
    
        /**
         * Test extraction with null parameters map.
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 7K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            }
    
            // Test WildcardBot - wildcard patterns
            // Disallow: /*.pdf$ - should block .pdf files but not .pdf with query params
            assertFalse(robotsTxt.allows("/document.pdf", "WildcardBot"));
            assertFalse(robotsTxt.allows("/files/report.pdf", "WildcardBot"));
            assertTrue(robotsTxt.allows("/document.pdf?download=true", "WildcardBot")); // $ means exact end
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 20.6K bytes
    - Viewed (0)
Back to top