- Sort Score
- Result 10 results
- Languages All
Results 21 - 30 of 363 for extractor (0.04 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractor.java
super(); } /** * Extracts text from the Word input stream. * @param in The input stream. * @param params The parameters. * @return The extracted data. */ @Override public ExtractData getText(final InputStream in, final Map<String, String> params) { validateInputStream(in);
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Wed Nov 19 08:55:01 UTC 2025 - 1.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java
* @param outExt the output file extension * @return the extracted text content * @throws ExtractException if an error occurs while reading the file */ protected String getOutputContent(final File outputFile, final String outExt) { final Extractor extractor = getExtractor(outExt); if (extractor != null) { final Map<String, String> params = new HashMap<>();Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 10.4K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java
// InputStream in = // ResourceUtil.getResourceAsStream("extractor/test_pass.pdf"); // TikaExtractor extractor = // (TikaExtractor) getContainer().getComponent( // "tikaExtractorForPdfPassword"); // Map<String, String> params = new HashMap<String, String>(); // params.put(ExtractData.URL, "http://example.com/test_pass.pdf"); // ExtractData extractData = extractor.getText(in, params);Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 30.6K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import org.apache.commons.compress.archivers.ArchiveStreamFactory; import org.apache.commons.compress.compressors.CompressorStreamFactory;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 3.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java
import org.codelibs.fess.crawler.exception.MaxLengthExceededException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import org.codelibs.fess.crawler.util.IgnoreCloseInputStream; import jakarta.annotation.Resource; /** * Extracts text content from TAR archives. */ public class TarExtractor extends AbstractExtractor {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 5.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java
import org.codelibs.fess.crawler.exception.MaxLengthExceededException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import org.codelibs.fess.crawler.util.IgnoreCloseInputStream; import jakarta.annotation.Resource; /** * Extracts text content from ZIP archives. */ public class ZipExtractor extends AbstractExtractor {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 4.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import java.util.Map; import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 1.9K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractorTest.java
private TestApiExtractorServer server; private ApiExtractor extractor; @Override protected void setUp() throws Exception { super.setUp(); server = new TestApiExtractorServer(port); server.start(); extractor = new ApiExtractor(); extractor.setUrl("http://127.0.0.1:" + port + "/"); extractor.init(); } @OverrideRegistered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 5.4K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractorTest.java
final InputStream in = new ByteArrayInputStream("test".getBytes()); extractor.resetTestState(); assertFalse("Should start with validate not called", extractor.isValidateCalled()); extractor.getText(in, null); assertTrue("Should be called after getText", extractor.isValidateCalled()); } /** * Test validateInputStream with edge case: empty stream.
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 8.4K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/EXTRACTOR_TESTS_README.md
# Extractor Implementation Tests This directory contains comprehensive tests for the Extractor implementations, focusing on the improvements made to resource management, error handling, and input validation. ## Test Files Overview ### 1. ExtractorResourceManagementTest.java **Purpose**: Verify proper resource management in Extractor implementations. **Key Test Areas**: - Resource closure on successful extraction (MS Office extractors)
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Wed Nov 19 08:55:01 UTC 2025 - 5.7K bytes - Viewed (0)