- Sort Score
- Result 10 results
- Languages All
Results 51 - 60 of 363 for extractor (0.04 sec)
-
src/main/java/org/codelibs/fess/helper/DocumentHelper.java
import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.CrawlingAccessException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.impl.TikaExtractor; import org.codelibs.fess.crawler.processor.ResponseProcessor; import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor; import org.codelibs.fess.crawler.rule.Rule;
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Fri Nov 28 16:29:12 UTC 2025 - 17.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java
import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content and metadata from CSV files. * This extractor provides better structured data extraction compared to Tika's generic text extraction. * * <p>Features: * <ul> * <li>Automatic delimiter detection (comma, tab, semicolon, pipe)</li> * <li>Header row detection and extraction</li> * <li>Column name to data value association</li>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java
*/ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import java.util.Map; import org.codelibs.core.io.InputStreamUtil; import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content from an input stream as plain text. */
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.ResourceUtil;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 4.6K bytes - Viewed (1) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorEnhancedTest.java
assertTrue("Error message should indicate extraction failure", e.getMessage().contains("Failed to extract")); } finally { // Reset to default encoding textExtractor.setEncoding("UTF-8"); } } /** * Test extraction with empty input stream. */ public void test_getText_emptyInputStream_returnsEmptyContent() {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 8.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java
* It provides functionality to manage passwords for different file patterns using regular expressions. * * <p>The extractor supports two types of password management: * <ul> * <li>Static passwords configured via {@link #addPassword(String, String)}</li> * <li>Dynamic passwords provided through extraction parameters</li> * </ul> *
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 5.1K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractorTest.java
final BufferedInputStream bis = new BufferedInputStream(in); final String encoding = htmlExtractor.getEncoding(bis); CloseableUtil.closeQuietly(bis); assertEquals("UTF-8", encoding); } public void test_getEncoding_sjis() { final InputStream in = ResourceUtil.getResourceAsStream("extractor/test_sjis.html");
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 3.7K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ArchiveExtractorErrorHandlingTest.java
import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl; import org.dbflute.utflute.core.PlainTestCase; /** * Test class for archive extractor error handling improvements. * Tests partial extraction, error recovery, and improved error messages. */
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 12.6K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/TextTransformerTest.java
import org.codelibs.fess.crawler.entity.ResponseData; import org.codelibs.fess.crawler.entity.ResultData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.extractor.impl.TikaExtractor; import org.dbflute.utflute.core.PlainTestCase; /** * @author shinsuke * */ public class TextTransformerTest extends PlainTestCase {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 4.6K bytes - Viewed (0)