Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 5 of 5 for InputSource (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/XmlUtil.java

         */
        public static Map<String, Object> getDataMap(final AccessResultData<?> accessResultData) {
            // create input source
            final InputSource is = new InputSource(new ByteArrayInputStream(accessResultData.getData()));
            if (StringUtil.isNotBlank(accessResultData.getEncoding())) {
                is.setEncoding(accessResultData.getEncoding());
            }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.4K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

                final String enc = getEncoding(bis);
    
                final DOMParser parser = getDomParser();
                final InputSource inputSource = new InputSource(bis);
                inputSource.setEncoding(enc);
                parser.parse(inputSource);
                final Document document = parser.getDocument();
    
                final StringBuilder buf = new StringBuilder(255);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

            final DOMParser parser = getDomParser();
            try (final InputStream in = responseData.getResponseBody()) {
                final InputSource is = new InputSource(in);
                if (responseData.getCharSet() != null) {
                    is.setEncoding(responseData.getCharSet());
                }
                parser.parse(is);
            } catch (final Exception e) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

    import org.codelibs.fess.crawler.util.XPathAPI;
    import org.codelibs.nekohtml.parsers.DOMParser;
    import org.w3c.dom.Document;
    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    /**
     * Extracts text content from HTML documents.
     */
    public class HtmlExtractor extends AbstractXmlExtractor {
        /** Logger for this class. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

    import org.codelibs.fess.crawler.util.CharUtil;
    import org.codelibs.fess.crawler.util.XPathAPI;
    import org.codelibs.nekohtml.parsers.DOMParser;
    import org.w3c.dom.Document;
    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    import jakarta.annotation.Resource;
    
    /**
     * The {@code HtmlTransformer} class is responsible for transforming HTML responses
     * during the crawling process. It extracts data, identifies child URLs, and handles
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
Back to top