Search Options

Results per page
Sort
Preferred Languages
Advance

Results 41 - 50 of 64 for mhtml (0.01 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

    import org.w3c.dom.Document;
    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    /**
     * {@link XpathTransformer} is a class that transforms HTML content into XML format based on XPath expressions.
     * It extracts data from an HTML document by applying XPath rules defined in {@link #fieldRuleMap}.
     * The extracted data is then formatted into an XML structure and stored in the {@link ResultData}.
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

            transformer.addTransformationRule("\\s+", " "); // Normalize whitespace
    
            // Simulate crawling response
            ResponseData responseData = new ResponseData();
            responseData.setUrl("http://example.com/page.html");
            responseData.setParentUrl("http://example.com/");
            responseData.setResponseBody("<html><body>  Test   Content  </body></html>".getBytes());
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 28K bytes
    - Viewed (0)
  3. fess-crawler-lasta/src/main/resources/crawler/extractor.xml

    		<postConstruct name="addExtractor">
    			<arg>[
    				"application/xml",
    				"application/xhtml+xml",
    				"application/rdf+xml",
    				"application/x-freemind",
    				"text/xml"
    				]</arg>
    			<arg>xmlExtractor</arg>
    		</postConstruct>
    		<postConstruct name="addExtractor">
    			<arg>[
    				"text/html"
    				]</arg>
    			<arg>htmlExtractor</arg>
    		</postConstruct>
    		<postConstruct name="addExtractor">
    			<arg>[
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Aug 01 21:40:30 UTC 2020
    - 49K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/RegexRule.java

     * rule.addRule("url", "https://example.com/.*");
     * rule.addRule("contentType", "text/html");
     * rule.setAllRequired(true); // Both URL and content type must match
     *
     * ResponseData responseData = new ResponseData();
     * responseData.setUrl("https://example.com/page1");
     * responseData.setContentType("text/html");
     *
     * boolean matches = rule.match(responseData); // Returns true
     * }
     * </pre>
     *
     */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.2K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractorTest.java

                        LhaExtractor lhaExtractor = container.getComponent("lhaExtractor");
                        factory.addExtractor("text/plain", tikaExtractor);
                        factory.addExtractor("text/html", tikaExtractor);
                        factory.addExtractor("application/x-lha", lhaExtractor);
    
                    })//
            ;
    
            lhaExtractor = container.getComponent("lhaExtractor");
    
        }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

            assertTrue(urlFilter.match("https://example.com/page.html"));
            assertTrue(urlFilter.match("https://example.com/document.pdf"));
            assertFalse(urlFilter.match("https://example.com/image.jpg"));
            assertFalse(urlFilter.match("https://example.com/admin/dashboard"));
            assertFalse(urlFilter.match("https://other.com/page.html"));
        }
    
        /**
         * Test match with no patterns configured
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 19K bytes
    - Viewed (0)
  7. fess-crawler/src/test/resources/extractor/tar/test.tar

    data/folder/file.html テスト data/file.txt テキスト...
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 10K bytes
    - Viewed (0)
  8. fess-crawler/src/test/resources/extractor/gz/test.tar.gz

    test.tar data/folder/file.html テスト data/file.txt テキスト...
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 351 bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractorTest.java

                        ZipExtractor zipExtractor = container.getComponent("zipExtractor");
                        factory.addExtractor("text/plain", tikaExtractor);
                        factory.addExtractor("text/html", tikaExtractor);
                        factory.addExtractor("application/zip", zipExtractor);
    
                    })//
            ;
    
            zipExtractor = container.getComponent("zipExtractor");
        }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 3.7K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TarExtractorTest.java

                        TarExtractor tarExtractor = container.getComponent("tarExtractor");
                        factory.addExtractor("text/plain", tikaExtractor);
                        factory.addExtractor("text/html", tikaExtractor);
                        factory.addExtractor("application/tar", tarExtractor);
                    })//
            ;
    
            tarExtractor = container.getComponent("tarExtractor");
        }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 3.7K bytes
    - Viewed (0)
Back to top