Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 3 of 3 for HtmlXpathExtractor (0.15 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

    import com.google.common.cache.CacheBuilder;
    import com.google.common.cache.CacheLoader;
    import com.google.common.cache.LoadingCache;
    
    /**
     * @author shinsuke
     *
     */
    public class HtmlXpathExtractor extends AbstractXmlExtractor {
        protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>",
                Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
    
    Registered: Wed Jun 12 15:17:51 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 7K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractorTest.java

        public HtmlXpathExtractor htmlXpathExtractor;
    
        @Override
        protected void setUp() throws Exception {
            super.setUp();
            StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("htmlXpathExtractor", HtmlXpathExtractor.class);
            htmlXpathExtractor = container.getComponent("htmlXpathExtractor");
            htmlXpathExtractor.init();
        }
    
    Registered: Wed Jun 12 15:17:51 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 4.1K bytes
    - Viewed (0)
  3. fess-crawler-lasta/src/main/resources/crawler/extractor.xml

    			<arg>"//TITLE"</arg>
    		</postConstruct>
    	</component>
    	<component name="xmlExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.XmlExtractor" />
    	<component name="htmlXpathExtractor"
    		class="org.codelibs.fess.crawler.extractor.impl.HtmlXpathExtractor">
    		<postConstruct name="addFeature">
    			<arg>"http://xml.org/sax/features/namespaces"</arg>
    			<arg>"false"</arg>
    		</postConstruct>
    	</component>
    Registered: Wed Jun 12 15:17:51 UTC 2024
    - Last Modified: Sat Aug 01 21:40:30 UTC 2020
    - 49K bytes
    - Viewed (0)
Back to top