- Sort Score
- Num 10 results
- Language All
Results 51 - 60 of 133 for extractors (0.14 seconds)
The search processing time has exceeded the limit. The displayed results may be partial.
-
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil;
Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Sun Nov 23 03:46:53 GMT 2025 - 4.7K bytes - Click Count (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractorTest.java
final BufferedInputStream bis = new BufferedInputStream(in); final String encoding = htmlExtractor.getEncoding(bis); CloseableUtil.closeQuietly(bis); assertEquals("UTF-8", encoding); } public void test_getEncoding_sjis() { final InputStream in = ResourceUtil.getResourceAsStream("extractor/test_sjis.html");
Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Sat Mar 15 06:52:00 GMT 2025 - 3.7K bytes - Click Count (0) -
fess-crawler/src/test/resources/extractor/json/test.json
{ "title": "Sample Document", "author": "John Doe", "version": "1.0", "published": "2025-01-15", "tags": ["crawler", "extractor", "json"], "content": { "summary": "This is a sample JSON document for testing", "body": "The extractor should handle nested objects and arrays properly" }, "metadata": { "created_at": "2025-01-01T00:00:00Z", "updated_at": "2025-01-15T12:00:00Z" }Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Sun Nov 23 03:46:53 GMT 2025 - 412 bytes - Click Count (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TarExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import org.apache.commons.compress.archivers.ArchiveStreamFactory; import org.apache.commons.compress.compressors.CompressorStreamFactory;
Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Thu Aug 07 02:55:08 GMT 2025 - 3.7K bytes - Click Count (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import java.util.HashMap; import java.util.Map; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil;
Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Sat Mar 15 06:52:00 GMT 2025 - 7.6K bytes - Click Count (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil;
Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Sat Mar 15 06:52:00 GMT 2025 - 2K bytes - Click Count (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/TextTransformerTest.java
import org.codelibs.fess.crawler.entity.ResponseData; import org.codelibs.fess.crawler.entity.ResultData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.extractor.impl.TikaExtractor; import org.dbflute.utflute.core.PlainTestCase; /** * @author shinsuke * */ public class TextTransformerTest extends PlainTestCase {Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Thu Aug 07 02:55:08 GMT 2025 - 4.6K bytes - Click Count (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil;
Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Sat Mar 15 06:52:00 GMT 2025 - 2.1K bytes - Click Count (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorEnhancedTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import org.codelibs.core.io.ResourceUtil; import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Mon Nov 24 03:59:47 GMT 2025 - 8.9K bytes - Click Count (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java
import org.apache.logging.log4j.Logger; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.Detector; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata;
Created: Sat Dec 20 11:21:39 GMT 2025 - Last Modified: Sun Nov 23 12:19:14 GMT 2025 - 30.8K bytes - Click Count (0)