Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 26 for Burles (0.07 sec)

  1. README.md

    #### Content Processing Pipeline
    - **Extractors**: Content extraction from various formats
    - **Transformers**: Data transformation and enrichment
    - **Filters**: URL filtering with regex patterns
    - **Rules**: Content processing rules and validation
    
    ## Building and Testing
    
    ### Build Commands
    
    ```bash
    # Build all modules
    mvn clean install
    
    # Build without tests
    mvn clean install -DskipTests
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

            return null;
        }
    
        /**
         * Adds a directive to the robots.txt rules.
         * The user-agent pattern in the directive is converted to a regular expression pattern,
         * where '*' is replaced with '.*' for pattern matching, and stored case-insensitively.
         *
         * @param directive The directive to add to the robots.txt rules
         */
        public void addDirective(final Directive directive) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/AbstractRuleTest.java

            testRule.setRuleId("middle");
            testRule.register(1);
    
            List<Rule> rules = ruleManager.getRules();
            assertEquals(3, rules.size());
            assertEquals("rule1", rules.get(0).getRuleId());
            assertEquals("middle", rules.get(1).getRuleId());
            assertEquals("rule2", rules.get(2).getRuleId());
        }
    
        /**
         * Test register method with last index
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 21.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

     *   <li>Extracting child URLs from the HTML content based on configured rules.</li>
     *   <li>Handling redirect URLs specified in the response headers.</li>
     * </ol>
     * <p>
     * The class also provides methods for configuring features and properties of the
     * underlying DOM parser, as well as defining rules for extracting child URLs
     * from specific HTML tags and attributes.
     * </p>
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/smb/SmbClientTest.java

            } catch (final ChildUrlsException e) {
                String[] urls = e.getChildUrlList().stream().map(r -> r.getUrl()).sorted().toArray(String[]::new);
                assertEquals(3, urls.length);
                assertEquals(baseUrl + "dir1/", urls[0]);
                assertEquals(baseUrl + "dir3/", urls[1]);
                assertEquals(baseUrl + "file1.txt", urls[2]);
            }
            try {
                smbClient.doGet(baseUrl + "dir1/");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 13.7K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleManagerTest.java

            List<Rule> rules = ruleManager.getRules();
            assertEquals(4, rules.size());
            assertEquals("rule1", rules.get(0).getRuleId());
            assertEquals("rule2", rules.get(1).getRuleId());
            assertEquals("rule3", rules.get(2).getRuleId());
            assertEquals("rule4", rules.get(3).getRuleId());
        }
    
        /**
         * Test adding duplicate rules
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 23.8K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

         */
        @Resource
        protected DataService<AccessResult<?>> dataService;
    
        /**
         * Filter for URLs to control which URLs are crawled.
         */
        @Resource
        protected UrlFilter urlFilter;
    
        /**
         * Manager for crawling rules and configurations.
         */
        @Resource
        protected RuleManager ruleManager;
    
        /**
         * Container for managing crawler components.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

            StatefulTransformer transformer = new StatefulTransformer("statefulTransformer");
    
            // Process multiple URLs
            String[] urls = { "http://example1.com", "http://example2.com", "http://example3.com" };
    
            for (String url : urls) {
                ResponseData responseData = new ResponseData();
                responseData.setUrl(url);
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 28K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleTest.java

            List<Rule> rules = new ArrayList<>();
    
            // Create rules with unique IDs
            for (int i = 0; i < 100; i++) {
                TestRule rule = new TestRule("rule_" + i, new TestResponseProcessor("processor_" + i), true);
                rules.add(rule);
            }
    
            // Verify all IDs are unique
            for (int i = 0; i < rules.size(); i++) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 22.7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java

            this.path = path;
        }
    
        /**
         * Gets the replacement string for question marks in URLs.
         *
         * @return the question mark replacement string
         */
        public String getQuestionStr() {
            return questionStr;
        }
    
        /**
         * Sets the replacement string for question marks in URLs.
         *
         * @param questionStr the question mark replacement string to set
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 11.7K bytes
    - Viewed (0)
Back to top