Search Options

Results per page
Sort
Preferred Languages
Advance

Results 91 - 100 of 103 for etext (0.02 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java

     * It parses the response body as a SitemapSet, extracts URLs from the sitemaps,
     * and adds them as child URLs to be crawled.
     *
     * <p>
     * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text.
     * It then iterates through the sitemaps in the SitemapSet, extracts the URL
     * from each sitemap, and creates a new {@link RequestData} object for each URL.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java

     * <pre>
     * XmlTransformer transformer = new XmlTransformer();
     * transformer.setNamespaceAware(true);
     * transformer.setCacheDuration(30);
     * transformer.addFieldRule("title", "/book/title/text()");
     * transformer.addFieldRule("author", "/book/author/name/text()");
     *
     * ResponseData responseData = new ResponseData();
     * responseData.setResponseBody(new ByteArrayInputStream(xmlContent.getBytes(StandardCharsets.UTF_8)));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 23.9K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/AbstractRuleTest.java

            conditionalRule.setUrlPattern("https?://.*\\.example\\.com/.*");
            conditionalRule.setMimeTypePattern("text/.*");
    
            // Test matching
            ResponseData responseData1 = new ResponseData();
            responseData1.setUrl("http://www.example.com/page");
            responseData1.setMimeType("text/html");
            assertTrue(conditionalRule.match(responseData1));
    
            // Test non-matching URL
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 21.9K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

            responseData.setUrl("http://example.com");
            responseData.setResponseBody("This is old text with 123 numbers".getBytes());
    
            ResultData resultData = transformer.transform(responseData);
    
            assertNotNull(resultData);
            assertEquals("contentTransformer", resultData.getTransformerName());
            assertEquals("This is new text with NUMBER numbers", new String(resultData.getData()));
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 28K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExecutionTimeoutException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content by executing an external command.
     */
    public class CommandExtractor extends AbstractExtractor {
        private static final Logger logger = LogManager.getLogger(CommandExtractor.class);
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 16K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

     * </p>
     *
     * <p>
     * Example usage:
     * </p>
     *
     * <pre>
     * XpathTransformer transformer = new XpathTransformer();
     * transformer.addFieldRule("title", "//title/text()");
     * transformer.addFieldRule("body", "//body/p/text()");
     *
     * ResponseData responseData = new ResponseData();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                        TikaExtractor tikaExtractor = container.getComponent("tikaExtractor");
                        factory.addExtractor("text/plain", tikaExtractor);
                        factory.addExtractor("text/html", tikaExtractor);
                    })//
                    .singleton("httpClient", HcHttpClient.class)//
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 19.1K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

    import org.codelibs.fess.crawler.util.XPathAPI;
    import org.codelibs.nekohtml.parsers.DOMParser;
    import org.w3c.dom.Document;
    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    /**
     * Extracts text content from HTML documents.
     */
    public class HtmlExtractor extends AbstractXmlExtractor {
        /** Logger for this class. */
        protected static final Logger logger = LogManager.getLogger(HtmlExtractor.class);
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler;
    
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.Locale;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.lang.StringUtil;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

    import java.net.NoRouteToHostException;
    import java.net.SocketException;
    import java.net.URI;
    import java.net.URISyntaxException;
    import java.net.UnknownHostException;
    import java.text.ParseException;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Locale;
    import java.util.Map;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
Back to top