haml - Code Search

fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/TextTransformerTest.java

        final byte[] data = new String("<html><body>xyz</body></html>").getBytes();
        final ResponseData responseData = new ResponseData();
        responseData.setUrl("file:/test.html");
        responseData.setCharSet(Constants.UTF_8);
        responseData.setResponseBody(data);
        responseData.setMimeType("text/html");
        final ResultData resultData = textTransformer.transform(responseData);

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 4.6K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

 * </p>
 * <p>
 * The transformation process involves:
 * </p>
 * <ol>
 *   <li>Determining the character set encoding of the HTML content.</li>
 *   <li>Storing the HTML content as data in the {@link ResultData}.</li>
 *   <li>Extracting child URLs from the HTML content based on configured rules.</li>
 *   <li>Handling redirect URLs specified in the response headers.</li>
 * </ol>
 * <p>

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 28.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleTest.java

        rule.addCondition("mimeType", "text/html");
        rule.addCondition("statusCode", "200");

        // All conditions match
        ResponseData responseData1 = new ResponseData();
        responseData1.setUrl("https://www.example.com/page");
        responseData1.setMimeType("text/html");
        responseData1.setHttpStatusCode(200);
        assertTrue(rule.match(responseData1));

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Wed Sep 03 14:42:53 UTC 2025

- 22.7K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/client/http/HcHttpClientTest.java

        assertEquals("http://localhost/login.html", HcHttpClient.constructRedirectLocation("http://localhost/", "/login.html"));
        assertEquals("http://localhost/path/login.html", HcHttpClient.constructRedirectLocation("http://localhost/path/", "login.html"));
        assertEquals("http://localhost/login.html", HcHttpClient.constructRedirectLocation("http://localhost/path/", "/login.html"));
        assertEquals("https://example.com/newpage",

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Sep 06 04:15:37 UTC 2025

- 11.7K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

 * The default value for {@code targetNodePath} is "//HTML/BODY | //@alt | //@title", which selects the body of the HTML document,
 * as well as the alt and title attributes.
 * </p>
 * <p>
 * The class uses {@link DOMParser} to parse HTML documents and {@link XPathAPI} to execute XPath queries.

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 10.3K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

        transformer.addTransformationRule("\\s+", " "); // Normalize whitespace

        // Simulate crawling response
        ResponseData responseData = new ResponseData();
        responseData.setUrl("http://example.com/page.html");
        responseData.setParentUrl("http://example.com/");
        responseData.setResponseBody("<html><body>  Test   Content  </body></html>".getBytes());

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Sep 06 04:15:37 UTC 2025

- 28K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

    }

    /**
     * Gets the pattern used for matching HTML tags.
     *
     * @return the HTML tag pattern
     */
    public Pattern getHtmlTagPattern() {
        return htmlTagPattern;
    }

    /**
     * Sets the pattern used for matching HTML tags.
     *
     * @param htmlTagPattern the HTML tag pattern to set
     */

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 9.3K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/RegexRule.java

 * rule.addRule("url", "https://example.com/.*");
 * rule.addRule("contentType", "text/html");
 * rule.setAllRequired(true); // Both URL and content type must match
 *
 * ResponseData responseData = new ResponseData();
 * responseData.setUrl("https://example.com/page1");
 * responseData.setContentType("text/html");
 *
 * boolean matches = rule.match(responseData); // Returns true
 * }
 * </pre>
 *
 */

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 6.2K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

        assertTrue(urlFilter.match("https://example.com/page.html"));
        assertTrue(urlFilter.match("https://example.com/document.pdf"));
        assertFalse(urlFilter.match("https://example.com/image.jpg"));
        assertFalse(urlFilter.match("https://example.com/admin/dashboard"));
        assertFalse(urlFilter.match("https://other.com/page.html"));
    }

    /**
     * Test match with no patterns configured

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Wed Sep 03 14:42:53 UTC 2025

- 19K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractorTest.java

                    LhaExtractor lhaExtractor = container.getComponent("lhaExtractor");
                    factory.addExtractor("text/plain", tikaExtractor);
                    factory.addExtractor("text/html", tikaExtractor);
                    factory.addExtractor("application/x-lha", lhaExtractor);

                })//
        ;

        lhaExtractor = container.getComponent("lhaExtractor");

    }

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 3.4K bytes

- Viewed (0)

Search Options