etext - Code Search

fess-crawler/src/test/resources/html/test1.shtml

<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<title>タイトル</title>
</head>
<body>
<h1>第一章</h1>
<h2>第一節</h2>
<p>ほげほげ<br>ふがふが</p>
<p>
<a href="test2.html">LINK</a>
</p>
<h1>第2章</h1>
<h2>第2節</h2>
</body>

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Oct 11 02:16:55 UTC 2015

- 289 bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractorTest.java

        final Map<String, String> params = new HashMap<String, String>();
        //final ExtractData text = extractor.getText(new ByteArrayInputStream(FileUtils.readFileToByteArray(new File(filePath))), params);
        final ExtractData text = extractor.getText(new ByteArrayInputStream(testStr.getBytes()), params);
        assertEquals(content, text.getContent());
    }

    // TODO other tests

    static class TestApiExtractorServer {

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Mar 15 06:52:00 UTC 2025

- 5.4K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/resources/html/test1.html

<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<title>タイトル</title>
</head>
<body>
<h1>第一章</h1>
<h2>第一節</h2>
<p>ほげほげ<br>ふがふが</p>
<p>
<a href="test2.html">LINK</a>
</p>
<h1>第2章</h1>
<h2>第2節</h2>
</body>

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Oct 11 02:16:55 UTC 2015

- 289 bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java

    public static final String FILE_PASSWORDS = "file.passwords";

    /** Map containing metadata key-value pairs */
    protected Map<String, String[]> metadata = new HashMap<>();

    /** The extracted content text */
    protected String content;

    /**
     * Constructs a new ExtractData.
     */
    public ExtractData() {
        // Default constructor
    }

    /**

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Sep 06 04:15:37 UTC 2025

- 3.8K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

                logger.error("Failed to close httpClient.", e);
            }
        }
    }

    /**
     * Extracts text from the input stream using the API endpoint.
     *
     * @param in the input stream to extract text from
     * @param params additional parameters
     * @return the extracted data
     * @throws ExtractException if extraction fails
     */
    @Override

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 12.2K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

LICENSE

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Mon Jan 11 04:26:17 UTC 2021

- 11.1K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java

 * governing permissions and limitations under the License.
 */
package org.codelibs.fess.crawler.extractor.impl;

import java.util.regex.Pattern;

/**
 * Extracts text content from XML documents.
 */
public class XmlExtractor extends AbstractXmlExtractor {

    /**
     * Creates a new XmlExtractor instance.
     */
    public XmlExtractor() {
        super();
    }

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 2.6K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/client/storage/StorageClientTest.java

        try (final ResponseData responseData = storageClient.doGet("storage://fess/file1.txt")) {
            assertEquals("storage://fess/file1.txt", responseData.getUrl());
            assertEquals("text/plain", responseData.getMimeType());
            assertEquals("file1", new String(InputStreamUtil.getBytes(responseData.getResponseBody())));
            assertEquals(5, responseData.getContentLength());

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 13.8K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

 * It uses XPath expressions to extract text content from HTML documents.
 * <p>
 * This class provides methods to configure the XPath expressions, parser features, and properties.
 * It also includes caching mechanism for XPathAPI instances to improve performance.
 * </p>
 * <p>
 * The extracted text is obtained from the nodes selected by the {@code targetNodePath} XPath expression.

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 10.3K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java

                    PdfExtractor pdfExtractor = container.getComponent("pdfExtractor");
                    factory.addExtractor("text/plain", tikaExtractor);
                    factory.addExtractor("text/html", tikaExtractor);
                    factory.addExtractor("application/pdf", pdfExtractor);
                });
        pdfExtractor = container.getComponent("pdfExtractor");

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Mar 15 06:52:00 UTC 2025

- 7.6K bytes

- Viewed (0)

Search Options