Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 13 for runes (0.03 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleManagerTest.java

            List<Rule> rules = ruleManager.getRules();
            assertEquals(4, rules.size());
            assertEquals("rule1", rules.get(0).getRuleId());
            assertEquals("rule2", rules.get(1).getRuleId());
            assertEquals("rule3", rules.get(2).getRuleId());
            assertEquals("rule4", rules.get(3).getRuleId());
        }
    
        /**
         * Test adding duplicate rules
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 23.8K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/AbstractRuleTest.java

            testRule.setRuleId("middle");
            testRule.register(1);
    
            List<Rule> rules = ruleManager.getRules();
            assertEquals(3, rules.size());
            assertEquals("rule1", rules.get(0).getRuleId());
            assertEquals("middle", rules.get(1).getRuleId());
            assertEquals("rule2", rules.get(2).getRuleId());
        }
    
        /**
         * Test register method with last index
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 21.9K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleTest.java

            List<Rule> rules = new ArrayList<>();
    
            // Create rules with unique IDs
            for (int i = 0; i < 100; i++) {
                TestRule rule = new TestRule("rule_" + i, new TestResponseProcessor("processor_" + i), true);
                rules.add(rule);
            }
    
            // Verify all IDs are unique
            for (int i = 0; i < rules.size(); i++) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 22.7K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

        /**
         * Checks if access to a given path is allowed for a specific user agent according to robots.txt rules.
         *
         * @param path The path to check for access permission
         * @param userAgent The user agent string to check against robots.txt directives
         * @return true if access is allowed, false if access is disallowed by robots.txt rules.
         *         Returns true if no matching directive is found for the user agent.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  5. README.md

    #### Content Processing Pipeline
    - **Extractors**: Content extraction from various formats
    - **Transformers**: Data transformation and enrichment
    - **Filters**: URL filtering with regex patterns
    - **Rules**: Content processing rules and validation
    
    ## Building and Testing
    
    ### Build Commands
    
    ```bash
    # Build all modules
    mvn clean install
    
    # Build without tests
    mvn clean install -DskipTests
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

        /**
         * Filter for URLs to control which URLs are crawled.
         */
        @Resource
        protected UrlFilter urlFilter;
    
        /**
         * Manager for crawling rules and configurations.
         */
        @Resource
        protected RuleManager ruleManager;
    
        /**
         * Container for managing crawler components.
         */
        @Resource
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

     *   <li>Extracting child URLs from the HTML content based on configured rules.</li>
     *   <li>Handling redirect URLs specified in the response headers.</li>
     * </ol>
     * <p>
     * The class also provides methods for configuring features and properties of the
     * underlying DOM parser, as well as defining rules for extracting child URLs
     * from specific HTML tags and attributes.
     * </p>
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/TextUtil.java

    /**
     * Utility class for text normalization and processing.
     *
     * This class provides methods to normalize text by reading characters from a provided Reader
     * and processing them according to specific rules. The main functionality is encapsulated
     * within the nested {@link TextNormalizeContext} class.
     *
     * <p>The text normalization process includes:
     * <ul>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

    import org.xml.sax.InputSource;
    
    /**
     * {@link XpathTransformer} is a class that transforms HTML content into XML format based on XPath expressions.
     * It extracts data from an HTML document by applying XPath rules defined in {@link #fieldRuleMap}.
     * The extracted data is then formatted into an XML structure and stored in the {@link ResultData}.
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

                    return null;
                }
    
                ResultData resultData = new ResultData();
                resultData.setTransformerName(name);
                // Apply transformation rules
                try (InputStream is = responseData.getResponseBody()) {
                    byte[] bytes = is.readAllBytes();
                    String content = new String(bytes);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 28K bytes
    - Viewed (0)
Back to top