Search Options

Results per page
Sort
Preferred Languages
Advance

Results 41 - 50 of 363 for extractor (0.04 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractorTest.java

        public void test_getText() {
            final InputStream in = ResourceUtil.getResourceAsStream("extractor/csv/test.csv");
            final ExtractData extractData = csvExtractor.getText(in, null);
            CloseableUtil.closeQuietly(in);
    
            final String content = extractData.getContent();
            logger.info(content);
    
            // Verify header extraction
            assertTrue(content.contains("Name"));
            assertTrue(content.contains("Email"));
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 5.3K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractorTest.java

        public void test_getText_nested() {
            final InputStream in = ResourceUtil.getResourceAsStream("extractor/json/test.json");
            final ExtractData extractData = jsonExtractor.getText(in, null);
            CloseableUtil.closeQuietly(in);
    
            final String content = extractData.getContent();
    
            // Verify nested content extraction
            assertTrue(content.contains("content.summary"));
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 4.7K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.IOException;
    import java.io.InputStream;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java

    /**
     * Extracts text content and metadata from Markdown files.
     * This extractor provides better structured data extraction compared to Tika's generic text extraction.
     *
     * <p>Features:
     * <ul>
     *   <li>YAML front matter metadata extraction</li>
     *   <li>Heading structure extraction</li>
     *   <li>Link URL extraction</li>
     *   <li>Code block content extraction</li>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 8.2K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java

    import com.fasterxml.jackson.databind.node.ObjectNode;
    
    /**
     * Extracts text content and metadata from JSON files.
     * This extractor provides better structured data extraction compared to Tika's generic text extraction.
     *
     * <p>Features:
     * <ul>
     *   <li>Structured text extraction with key-value pairs</li>
     *   <li>Top-level field extraction as metadata</li>
     *   <li>Nested structure flattening with configurable depth</li>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 9.7K bytes
    - Viewed (0)
  6. fess-crawler/src/test/resources/extractor/json/test.json

    {
      "title": "Sample Document",
      "author": "John Doe",
      "version": "1.0",
      "published": "2025-01-15",
      "tags": ["crawler", "extractor", "json"],
      "content": {
        "summary": "This is a sample JSON document for testing",
        "body": "The extractor should handle nested objects and arrays properly"
      },
      "metadata": {
        "created_at": "2025-01-01T00:00:00Z",
        "updated_at": "2025-01-15T12:00:00Z"
      }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 412 bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsExcelExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.IOException;
    import java.io.InputStream;
    import java.util.Map;
    
    import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 2K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 7.6K bytes
    - Viewed (0)
Back to top