- Sort Score
- Result 10 results
- Languages All
Results 1 - 2 of 2 for htmlExtractor (0.2 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java
import org.w3c.dom.Document; import org.w3c.dom.Node; import org.xml.sax.InputSource; /** * @author shinsuke * */ public class HtmlExtractor extends AbstractXmlExtractor { protected static final Logger logger = LoggerFactory.getLogger(HtmlExtractor.class); protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>",
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 6.9K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractorTest.java
public HtmlExtractor htmlExtractor; @Override protected void setUp() throws Exception { super.setUp(); StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("htmlExtractor", HtmlExtractor.class); htmlExtractor = container.getComponent("htmlExtractor"); htmlExtractor.addMetadata("title", "//TITLE"); }
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 3.7K bytes - Viewed (0)