Search Options

Display Count
Sort
Preferred Language
Advanced Search

Results 51 - 60 of 133 for extractors (0.14 seconds)

The search processing time has exceeded the limit. The displayed results may be partial.

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sun Nov 23 03:46:53 GMT 2025
    - 4.7K bytes
    - Click Count (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractorTest.java

            final BufferedInputStream bis = new BufferedInputStream(in);
            final String encoding = htmlExtractor.getEncoding(bis);
            CloseableUtil.closeQuietly(bis);
            assertEquals("UTF-8", encoding);
        }
    
        public void test_getEncoding_sjis() {
            final InputStream in = ResourceUtil.getResourceAsStream("extractor/test_sjis.html");
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sat Mar 15 06:52:00 GMT 2025
    - 3.7K bytes
    - Click Count (0)
  3. fess-crawler/src/test/resources/extractor/json/test.json

    {
      "title": "Sample Document",
      "author": "John Doe",
      "version": "1.0",
      "published": "2025-01-15",
      "tags": ["crawler", "extractor", "json"],
      "content": {
        "summary": "This is a sample JSON document for testing",
        "body": "The extractor should handle nested objects and arrays properly"
      },
      "metadata": {
        "created_at": "2025-01-01T00:00:00Z",
        "updated_at": "2025-01-15T12:00:00Z"
      }
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sun Nov 23 03:46:53 GMT 2025
    - 412 bytes
    - Click Count (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TarExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.IOException;
    import java.io.InputStream;
    
    import org.apache.commons.compress.archivers.ArchiveStreamFactory;
    import org.apache.commons.compress.compressors.CompressorStreamFactory;
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Thu Aug 07 02:55:08 GMT 2025
    - 3.7K bytes
    - Click Count (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sat Mar 15 06:52:00 GMT 2025
    - 7.6K bytes
    - Click Count (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sat Mar 15 06:52:00 GMT 2025
    - 2K bytes
    - Click Count (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/TextTransformerTest.java

    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.entity.ResultData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    import org.codelibs.fess.crawler.extractor.impl.TikaExtractor;
    import org.dbflute.utflute.core.PlainTestCase;
    
    /**
     * @author shinsuke
     *
     */
    public class TextTransformerTest extends PlainTestCase {
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Thu Aug 07 02:55:08 GMT 2025
    - 4.6K bytes
    - Click Count (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sat Mar 15 06:52:00 GMT 2025
    - 2.1K bytes
    - Click Count (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorEnhancedTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.ByteArrayInputStream;
    import java.io.IOException;
    import java.io.InputStream;
    
    import org.codelibs.core.io.ResourceUtil;
    import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Mon Nov 24 03:59:47 GMT 2025
    - 8.9K bytes
    - Click Count (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

    import org.apache.logging.log4j.Logger;
    import org.apache.tika.config.TikaConfig;
    import org.apache.tika.detect.Detector;
    import org.apache.tika.exception.TikaException;
    import org.apache.tika.extractor.EmbeddedDocumentExtractor;
    import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
    import org.apache.tika.io.TemporaryResources;
    import org.apache.tika.io.TikaInputStream;
    import org.apache.tika.metadata.Metadata;
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sun Nov 23 12:19:14 GMT 2025
    - 30.8K bytes
    - Click Count (0)
Back to Top