- Sort Score
- Result 10 results
- Languages All
Results 1 - 3 of 3 for HtmlExtractor (0.1 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java
import org.w3c.dom.Document; import org.w3c.dom.Node; import org.xml.sax.InputSource; /** * @author shinsuke * */ public class HtmlExtractor extends AbstractXmlExtractor { protected static final Logger logger = LoggerFactory.getLogger(HtmlExtractor.class); protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>",
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 6.9K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractorTest.java
public HtmlExtractor htmlExtractor; @Override protected void setUp() throws Exception { super.setUp(); StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("htmlExtractor", HtmlExtractor.class); htmlExtractor = container.getComponent("htmlExtractor"); htmlExtractor.addMetadata("title", "//TITLE"); }
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 3.7K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler/extractor.xml
class="org.codelibs.fess.crawler.extractor.impl.LhaExtractor" /> <component name="textExtractor" class="org.codelibs.fess.crawler.extractor.impl.TextExtractor" /> <component name="htmlExtractor" class="org.codelibs.fess.crawler.extractor.impl.HtmlExtractor"> <property name="featureMap"> <component class="java.util.LinkedHashMap"> <postConstruct name="put"> <arg>"http://xml.org/sax/features/namespaces"</arg>
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Sat Aug 01 21:40:30 UTC 2020 - 49K bytes - Viewed (0)