Search Options

Results per page
Sort
Preferred Languages
Advance

Results 21 - 30 of 60 for Extractor (0.09 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractorTest.java

            final String content = htmlXpathExtractor.getText(in, null).getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("ใƒ†ใ‚นใƒˆ"));
        }
    
        public void test_getHtml_sjis() {
            final InputStream in = ResourceUtil.getResourceAsStream("extractor/test_sjis.html");
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 4.1K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.IOException;
    import java.io.InputStream;
    import java.util.Map;
    
    import org.apache.poi.hdgf.extractor.VisioTextExtractor;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 1.7K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.IOException;
    import java.io.InputStream;
    
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 3.3K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.CrawlingAccessException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.impl.TikaExtractor;
    import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer;
    import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
    import org.codelibs.fess.crawler.util.FieldConfigs;
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Fri Oct 11 21:11:58 UTC 2024
    - 23.6K bytes
    - Viewed (0)
  5. build-logic/documentation/src/main/groovy/gradlebuild/docs/dsl/source/ClassMetaDataUtil.java

    import java.io.File;
    import java.util.HashSet;
    import java.util.Set;
    
    class ClassMetaDataUtil {
        static void extractFromMetadata(File metaData, Set<String> excludedPackagePatterns, Action<ClassMetaData> extractor) {
            SimpleClassMetaDataRepository<ClassMetaData> repository = new SimpleClassMetaDataRepository<>();
            repository.load(metaData);
    
            final Set<String> excludedPrefixes = new HashSet<>();
    Registered: Wed Nov 06 11:36:14 UTC 2024
    - Last Modified: Wed Dec 09 08:14:05 UTC 2020
    - 2.4K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 7.6K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

                            }
                            return new XPathAPI();
                        }
                    });
        }
    
        /*
         * (non-Javadoc)
         *
         * @see org.codelibs.fess.crawler.extractor.Extractor#getText(java.io.InputStream,
         * java.util.Map)
         */
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            if (in == null) {
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:47:32 UTC 2024
    - 7K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/ExtractorFactoryTest.java

            final Extractor extractor = new Extractor() {
                public ExtractData getText(final InputStream in, final Map<String, String> params) {
                    return null;
                }
            };
    
            assertNull(extractorFactory.getExtractor("test"));
            extractorFactory.addExtractor("test", extractor);
            assertEquals(extractor, extractorFactory.getExtractor("test"));
        }
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Tue Jun 18 05:49:13 UTC 2024
    - 6.5K bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

                extractor = ComponentUtil.getComponent("tikaExtractor");
                if (extractor == null) {
                    throw new FessSystemException("Could not find tikaExtractor.");
                }
            }
    
            if (logger.isDebugEnabled()) {
                logger.debug("url={}, extractor={}", responseData.getUrl(), extractor);
            }
            return extractor;
        }
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Thu Feb 22 01:53:18 UTC 2024
    - 2.4K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/helper/DocumentHelper.java

    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.CrawlingAccessException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.impl.TikaExtractor;
    import org.codelibs.fess.crawler.processor.ResponseProcessor;
    import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
    import org.codelibs.fess.crawler.rule.Rule;
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Thu Feb 22 01:53:18 UTC 2024
    - 12K bytes
    - Viewed (0)
Back to top