Search Options

Results per page
Sort
Preferred Languages
Advance

Results 41 - 50 of 73 for etext (0.05 sec)

  1. fess-crawler/src/test/resources/html/test1.shtml

    <html>
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
    <title>タイトル</title>
    </head>
    <body>
    <h1>第一章</h1>
    <h2>第一節</h2>
    <p>ほげほげ<br>ふがふが</p>
    <p>
    <a href="test2.html">LINK</a>
    </p>
    <h1>第2章</h1>
    <h2>第2節</h2>
    </body>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 289 bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractorTest.java

            final Map<String, String> params = new HashMap<String, String>();
            //final ExtractData text = extractor.getText(new ByteArrayInputStream(FileUtils.readFileToByteArray(new File(filePath))), params);
            final ExtractData text = extractor.getText(new ByteArrayInputStream(testStr.getBytes()), params);
            assertEquals(content, text.getContent());
        }
    
        // TODO other tests
    
        static class TestApiExtractorServer {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 5.4K bytes
    - Viewed (0)
  3. fess-crawler/src/test/resources/html/test1.html

    <html>
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
    <title>タイトル</title>
    </head>
    <body>
    <h1>第一章</h1>
    <h2>第一節</h2>
    <p>ほげほげ<br>ふがふが</p>
    <p>
    <a href="test2.html">LINK</a>
    </p>
    <h1>第2章</h1>
    <h2>第2節</h2>
    </body>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 289 bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java

        public static final String FILE_PASSWORDS = "file.passwords";
    
        /** Map containing metadata key-value pairs */
        protected Map<String, String[]> metadata = new HashMap<>();
    
        /** The extracted content text */
        protected String content;
    
        /**
         * Constructs a new ExtractData.
         */
        public ExtractData() {
            // Default constructor
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

                    logger.error("Failed to close httpClient.", e);
                }
            }
        }
    
        /**
         * Extracts text from the input stream using the API endpoint.
         *
         * @param in the input stream to extract text from
         * @param params additional parameters
         * @return the extracted data
         * @throws ExtractException if extraction fails
         */
        @Override
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  6. LICENSE

          (d) If the Work includes a "NOTICE" text file as part of its
              distribution, then any Derivative Works that You distribute must
              include a readable copy of the attribution notices contained
              within such NOTICE file, excluding those notices that do not
              pertain to any part of the Derivative Works, in at least one
              of the following places: within a NOTICE text file distributed
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Mon Jan 11 04:26:17 UTC 2021
    - 11.1K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.util.regex.Pattern;
    
    /**
     * Extracts text content from XML documents.
     */
    public class XmlExtractor extends AbstractXmlExtractor {
    
        /**
         * Creates a new XmlExtractor instance.
         */
        public XmlExtractor() {
            super();
        }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/storage/StorageClientTest.java

            try (final ResponseData responseData = storageClient.doGet("storage://fess/file1.txt")) {
                assertEquals("storage://fess/file1.txt", responseData.getUrl());
                assertEquals("text/plain", responseData.getMimeType());
                assertEquals("file1", new String(InputStreamUtil.getBytes(responseData.getResponseBody())));
                assertEquals(5, responseData.getContentLength());
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 13.8K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

     * It uses XPath expressions to extract text content from HTML documents.
     * <p>
     * This class provides methods to configure the XPath expressions, parser features, and properties.
     * It also includes caching mechanism for XPathAPI instances to improve performance.
     * </p>
     * <p>
     * The extracted text is obtained from the nodes selected by the {@code targetNodePath} XPath expression.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java

                        PdfExtractor pdfExtractor = container.getComponent("pdfExtractor");
                        factory.addExtractor("text/plain", tikaExtractor);
                        factory.addExtractor("text/html", tikaExtractor);
                        factory.addExtractor("application/pdf", pdfExtractor);
                    });
            pdfExtractor = container.getComponent("pdfExtractor");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 7.6K bytes
    - Viewed (0)
Back to top