- Sort Score
- Result 10 results
- Languages All
Results 41 - 50 of 363 for extractor (0.07 sec)
-
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractorTest.java
public void test_getText() { final InputStream in = ResourceUtil.getResourceAsStream("extractor/csv/test.csv"); final ExtractData extractData = csvExtractor.getText(in, null); CloseableUtil.closeQuietly(in); final String content = extractData.getContent(); logger.info(content); // Verify header extraction assertTrue(content.contains("Name")); assertTrue(content.contains("Email"));
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 5.3K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractorTest.java
public void test_getText_nested() { final InputStream in = ResourceUtil.getResourceAsStream("extractor/json/test.json"); final ExtractData extractData = jsonExtractor.getText(in, null); CloseableUtil.closeQuietly(in); final String content = extractData.getContent(); // Verify nested content extraction assertTrue(content.contains("content.summary"));Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 4.7K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 3.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java
/** * Extracts text content and metadata from Markdown files. * This extractor provides better structured data extraction compared to Tika's generic text extraction. * * <p>Features: * <ul> * <li>YAML front matter metadata extraction</li> * <li>Heading structure extraction</li> * <li>Link URL extraction</li> * <li>Code block content extraction</li>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 8.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java
import com.fasterxml.jackson.databind.node.ObjectNode; /** * Extracts text content and metadata from JSON files. * This extractor provides better structured data extraction compared to Tika's generic text extraction. * * <p>Features: * <ul> * <li>Structured text extraction with key-value pairs</li> * <li>Top-level field extraction as metadata</li> * <li>Nested structure flattening with configurable depth</li>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 9.7K bytes - Viewed (0) -
fess-crawler/src/test/resources/extractor/json/test.json
{ "title": "Sample Document", "author": "John Doe", "version": "1.0", "published": "2025-01-15", "tags": ["crawler", "extractor", "json"], "content": { "summary": "This is a sample JSON document for testing", "body": "The extractor should handle nested objects and arrays properly" }, "metadata": { "created_at": "2025-01-01T00:00:00Z", "updated_at": "2025-01-15T12:00:00Z" }Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 412 bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsExcelExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 2.1K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 2.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import java.util.Map; import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import java.util.HashMap; import java.util.Map; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 7.6K bytes - Viewed (0)