- Sort Score
- Result 10 results
- Languages All
Results 91 - 100 of 103 for etext (0.01 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java
* It parses the response body as a SitemapSet, extracts URLs from the sitemaps, * and adds them as child URLs to be crawled. * * <p> * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text. * It then iterates through the sitemaps in the SitemapSet, extracts the URL * from each sitemap, and creates a new {@link RequestData} object for each URL.Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 3.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java
* <pre> * XmlTransformer transformer = new XmlTransformer(); * transformer.setNamespaceAware(true); * transformer.setCacheDuration(30); * transformer.addFieldRule("title", "/book/title/text()"); * transformer.addFieldRule("author", "/book/author/name/text()"); * * ResponseData responseData = new ResponseData(); * responseData.setResponseBody(new ByteArrayInputStream(xmlContent.getBytes(StandardCharsets.UTF_8)));
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 23.9K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/AbstractRuleTest.java
conditionalRule.setUrlPattern("https?://.*\\.example\\.com/.*"); conditionalRule.setMimeTypePattern("text/.*"); // Test matching ResponseData responseData1 = new ResponseData(); responseData1.setUrl("http://www.example.com/page"); responseData1.setMimeType("text/html"); assertTrue(conditionalRule.match(responseData1)); // Test non-matching URL
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Wed Sep 03 14:42:53 UTC 2025 - 21.9K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java
responseData.setUrl("http://example.com"); responseData.setResponseBody("This is old text with 123 numbers".getBytes()); ResultData resultData = transformer.transform(responseData); assertNotNull(resultData); assertEquals("contentTransformer", resultData.getTransformerName()); assertEquals("This is new text with NUMBER numbers", new String(resultData.getData()));
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 28K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java
import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExecutionTimeoutException; import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content by executing an external command. */ public class CommandExtractor extends AbstractExtractor { private static final Logger logger = LogManager.getLogger(CommandExtractor.class);Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 16K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java
* </p> * * <p> * Example usage: * </p> * * <pre> * XpathTransformer transformer = new XpathTransformer(); * transformer.addFieldRule("title", "//title/text()"); * transformer.addFieldRule("body", "//body/p/text()"); * * ResponseData responseData = new ResponseData();
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 13.1K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java
TikaExtractor tikaExtractor = container.getComponent("tikaExtractor"); factory.addExtractor("text/plain", tikaExtractor); factory.addExtractor("text/html", tikaExtractor); })// .singleton("httpClient", HcHttpClient.class)//
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 19.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java
import org.codelibs.fess.crawler.util.XPathAPI; import org.codelibs.nekohtml.parsers.DOMParser; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.xml.sax.InputSource; /** * Extracts text content from HTML documents. */ public class HtmlExtractor extends AbstractXmlExtractor { /** Logger for this class. */ protected static final Logger logger = LogManager.getLogger(HtmlExtractor.class);Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 9.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.lang.StringUtil;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 14K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java
import java.net.NoRouteToHostException; import java.net.SocketException; import java.net.URI; import java.net.URISyntaxException; import java.net.UnknownHostException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 52.2K bytes - Viewed (0)