Search Options

Results per page
Sort
Preferred Languages
Advance

Results 41 - 50 of 60 for Extractor (0.05 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/TextTransformerTest.java

    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.entity.ResultData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    import org.codelibs.fess.crawler.extractor.impl.TikaExtractor;
    import org.dbflute.utflute.core.PlainTestCase;
    
    /**
     * @author shinsuke
     *
     */
    public class TextTransformerTest extends PlainTestCase {
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 4.6K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TarExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.IOException;
    import java.io.InputStream;
    
    import org.apache.commons.compress.archivers.ArchiveStreamFactory;
    import org.apache.commons.compress.compressors.CompressorStreamFactory;
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 3.6K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

        /*
         * (non-Javadoc)
         *
         * @see
         * org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor#getEncodingPattern()
         */
        @Override
        protected Pattern getEncodingPattern() {
            return metaCharsetPattern;
        }
    
        /*
         * (non-Javadoc)
         *
         * @see org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor#getTagPattern()
         */
        @Override
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 6.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

        protected int maxOutputLine = 1000;
    
        protected boolean standardOutput = false;
    
        /*
         * (non-Javadoc)
         *
         * @see org.codelibs.fess.crawler.extractor.Extractor#getText(java.io.InputStream,
         * java.util.Map)
         */
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 13.6K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 2K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.Map;
    
    import org.codelibs.core.io.InputStreamUtil;
    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.ExtractData;
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 1.7K bytes
    - Viewed (0)
  7. fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml

        <include path="crawler/client.xml"/>
        <include path="crawler/rule.xml"/>
        <include path="crawler/filter.xml"/>
        <include path="crawler/interval.xml"/>
        <include path="crawler/extractor.xml"/>
        <include path="crawler/mimetype.xml"/>
        <include path="crawler/encoding.xml"/>
        <include path="crawler/urlconverter.xml"/>
        <include path="crawler/log.xml"/>
        <include path="crawler/sitemaps.xml"/>
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Nov 07 04:44:10 UTC 2024
    - 2.2K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.Map;
    
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.entity.ExtractData;
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 1.5K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

    import org.apache.commons.lang3.SystemUtils;
    import org.apache.tika.config.TikaConfig;
    import org.apache.tika.detect.Detector;
    import org.apache.tika.exception.TikaException;
    import org.apache.tika.extractor.EmbeddedDocumentExtractor;
    import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
    import org.apache.tika.io.TemporaryResources;
    import org.apache.tika.io.TikaInputStream;
    import org.apache.tika.metadata.Metadata;
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Sat Oct 12 01:41:37 UTC 2024
    - 25K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.util.regex.Pattern;
    
    /**
     * @author shinsuke
     *
     */
    public class XmlExtractor extends AbstractXmlExtractor {
        protected Pattern xmlEncodingPattern =
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 1.6K bytes
    - Viewed (0)
Back to top