Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 2 of 2 for htmlExtractor (0.2 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

    import org.w3c.dom.Document;
    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    /**
     * @author shinsuke
     *
     */
    public class HtmlExtractor extends AbstractXmlExtractor {
        protected static final Logger logger = LoggerFactory.getLogger(HtmlExtractor.class);
    
        protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>",
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 6.9K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractorTest.java

        public HtmlExtractor htmlExtractor;
    
        @Override
        protected void setUp() throws Exception {
            super.setUp();
            StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("htmlExtractor", HtmlExtractor.class);
            htmlExtractor = container.getComponent("htmlExtractor");
            htmlExtractor.addMetadata("title", "//TITLE");
        }
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 3.7K bytes
    - Viewed (0)
Back to top