- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 26 for Burles (0.08 sec)
-
README.md
#### Content Processing Pipeline - **Extractors**: Content extraction from various formats - **Transformers**: Data transformation and enrichment - **Filters**: URL filtering with regex patterns - **Rules**: Content processing rules and validation ## Building and Testing ### Build Commands ```bash # Build all modules mvn clean install # Build without tests mvn clean install -DskipTests
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java
return null; } /** * Adds a directive to the robots.txt rules. * The user-agent pattern in the directive is converted to a regular expression pattern, * where '*' is replaced with '.*' for pattern matching, and stored case-insensitively. * * @param directive The directive to add to the robots.txt rules */ public void addDirective(final Directive directive) {Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/AbstractRuleTest.java
testRule.setRuleId("middle"); testRule.register(1); List<Rule> rules = ruleManager.getRules(); assertEquals(3, rules.size()); assertEquals("rule1", rules.get(0).getRuleId()); assertEquals("middle", rules.get(1).getRuleId()); assertEquals("rule2", rules.get(2).getRuleId()); } /** * Test register method with last index */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Wed Sep 03 14:42:53 UTC 2025 - 21.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java
* <li>Extracting child URLs from the HTML content based on configured rules.</li> * <li>Handling redirect URLs specified in the response headers.</li> * </ol> * <p> * The class also provides methods for configuring features and properties of the * underlying DOM parser, as well as defining rules for extracting child URLs * from specific HTML tags and attributes. * </p> * * <p>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 28.5K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/client/smb/SmbClientTest.java
} catch (final ChildUrlsException e) { String[] urls = e.getChildUrlList().stream().map(r -> r.getUrl()).sorted().toArray(String[]::new); assertEquals(3, urls.length); assertEquals(baseUrl + "dir1/", urls[0]); assertEquals(baseUrl + "dir3/", urls[1]); assertEquals(baseUrl + "file1.txt", urls[2]); } try { smbClient.doGet(baseUrl + "dir1/");
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 13.7K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleManagerTest.java
List<Rule> rules = ruleManager.getRules(); assertEquals(4, rules.size()); assertEquals("rule1", rules.get(0).getRuleId()); assertEquals("rule2", rules.get(1).getRuleId()); assertEquals("rule3", rules.get(2).getRuleId()); assertEquals("rule4", rules.get(3).getRuleId()); } /** * Test adding duplicate rules */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 23.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java
*/ @Resource protected DataService<AccessResult<?>> dataService; /** * Filter for URLs to control which URLs are crawled. */ @Resource protected UrlFilter urlFilter; /** * Manager for crawling rules and configurations. */ @Resource protected RuleManager ruleManager; /** * Container for managing crawler components.Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 14K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java
StatefulTransformer transformer = new StatefulTransformer("statefulTransformer"); // Process multiple URLs String[] urls = { "http://example1.com", "http://example2.com", "http://example3.com" }; for (String url : urls) { ResponseData responseData = new ResponseData(); responseData.setUrl(url);
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 28K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleTest.java
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Wed Sep 03 14:42:53 UTC 2025 - 22.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java
this.path = path; } /** * Gets the replacement string for question marks in URLs. * * @return the question mark replacement string */ public String getQuestionStr() { return questionStr; } /** * Sets the replacement string for question marks in URLs. * * @param questionStr the question mark replacement string to set */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 11.7K bytes - Viewed (0)