Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 19 for mhtml (0.04 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImplTest.java

            assertContentType("text/plain", null, "hoge.txt");
            assertContentType("text/html", "html/test1.html", "hoge.html");
            assertContentType("text/html", "html/test1.html", "hoge.htm");
            assertContentType("text/html", "html/test1.shtml", "hoge.shtml");
    
            assertContentType("application/msword", "test/text1.txt", "hoge.doc");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 11.6K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

        /**
         * Checks if the response data represents HTML content.
         *
         * @param responseData the response data to check
         * @return true if the content is HTML, false otherwise
         */
        protected boolean isHtml(final ResponseData responseData) {
            final String mimeType = responseData.getMimeType();
            if ("text/html".equals(mimeType) || "application/xhtml+xml".equals(mimeType)) {
                return true;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformerTest.java

            assertEquals("http://hoge/index.html", htmlTransformer.normalizeUrl(url));
    
            url = "http://hoge/index.html;jsessionid=hoge?a=1";
            assertEquals("http://hoge/index.html?a=1", htmlTransformer.normalizeUrl(url));
    
            url = "http://hoge/index.html;jsessionid=hoge.fuga?a=1";
            assertEquals("http://hoge/index.html?a=1", htmlTransformer.normalizeUrl(url));
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 13.8K bytes
    - Viewed (0)
  4. README.md

    - Image metadata (EXIF, IPTC, XMP)
    
    #### Archives and Compressed Files
    - ZIP, TAR, GZ archives
    - LHA compression format
    - Nested archive extraction
    
    #### Web and Markup
    - HTML, XHTML with XPath support
    - XML documents
    - JSON and structured data
    
    #### Media Files
    - Audio formats (MP3, WAV, FLAC)
    - Video formats (MP4, AVI, MOV)
    - Metadata extraction from media files
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

     * The default value for {@code targetNodePath} is "//HTML/BODY | //@alt | //@title", which selects the body of the HTML document,
     * as well as the alt and title attributes.
     * </p>
     * <p>
     * The class uses {@link DOMParser} to parse HTML documents and {@link XPathAPI} to execute XPath queries.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleTest.java

            rule.addCondition("mimeType", "text/html");
            rule.addCondition("statusCode", "200");
    
            // All conditions match
            ResponseData responseData1 = new ResponseData();
            responseData1.setUrl("https://www.example.com/page");
            responseData1.setMimeType("text/html");
            responseData1.setHttpStatusCode(200);
            assertTrue(rule.match(responseData1));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 22.7K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/http/HcHttpClientTest.java

            assertEquals("http://localhost/login.html", HcHttpClient.constructRedirectLocation("http://localhost/", "/login.html"));
            assertEquals("http://localhost/path/login.html", HcHttpClient.constructRedirectLocation("http://localhost/path/", "login.html"));
            assertEquals("http://localhost/login.html", HcHttpClient.constructRedirectLocation("http://localhost/path/", "/login.html"));
            assertEquals("https://example.com/newpage",
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 11.7K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

    import org.w3c.dom.Document;
    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    /**
     * {@link XpathTransformer} is a class that transforms HTML content into XML format based on XPath expressions.
     * It extracts data from an HTML document by applying XPath rules defined in {@link #fieldRuleMap}.
     * The extracted data is then formatted into an XML structure and stored in the {@link ResultData}.
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

            transformer.addTransformationRule("\\s+", " "); // Normalize whitespace
    
            // Simulate crawling response
            ResponseData responseData = new ResponseData();
            responseData.setUrl("http://example.com/page.html");
            responseData.setParentUrl("http://example.com/");
            responseData.setResponseBody("<html><body>  Test   Content  </body></html>".getBytes());
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 28K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

            assertTrue(urlFilter.match("https://example.com/page.html"));
            assertTrue(urlFilter.match("https://example.com/document.pdf"));
            assertFalse(urlFilter.match("https://example.com/image.jpg"));
            assertFalse(urlFilter.match("https://example.com/admin/dashboard"));
            assertFalse(urlFilter.match("https://other.com/page.html"));
        }
    
        /**
         * Test match with no patterns configured
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 19K bytes
    - Viewed (0)
Back to top