Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 52 for Burles (0.09 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/UrlConvertHelper.java

    /**
     * Helper class for converting URLs based on a set of predefined rules.
     *
     * <p>This class provides functionality to convert URLs by replacing parts of the URL
     * based on a map of target strings and their corresponding replacements. It allows
     * adding new conversion rules, setting the entire conversion map, and converting
     * URLs using these rules.</p>
     *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.1K bytes
    - Viewed (0)
  2. README.md

    #### Content Processing Pipeline
    - **Extractors**: Content extraction from various formats
    - **Transformers**: Data transformation and enrichment
    - **Filters**: URL filtering with regex patterns
    - **Rules**: Content processing rules and validation
    
    ## Building and Testing
    
    ### Build Commands
    
    ```bash
    # Build all modules
    mvn clean install
    
    # Build without tests
    mvn clean install -DskipTests
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

            return null;
        }
    
        /**
         * Adds a directive to the robots.txt rules.
         * The user-agent pattern in the directive is converted to a regular expression pattern,
         * where '*' is replaced with '.*' for pattern matching, and stored case-insensitively.
         *
         * @param directive The directive to add to the robots.txt rules
         */
        public void addDirective(final Directive directive) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/AbstractRuleTest.java

            testRule.setRuleId("middle");
            testRule.register(1);
    
            List<Rule> rules = ruleManager.getRules();
            assertEquals(3, rules.size());
            assertEquals("rule1", rules.get(0).getRuleId());
            assertEquals("middle", rules.get(1).getRuleId());
            assertEquals("rule2", rules.get(2).getRuleId());
        }
    
        /**
         * Test register method with last index
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 21.9K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

     *   <li>Extracting child URLs from the HTML content based on configured rules.</li>
     *   <li>Handling redirect URLs specified in the response headers.</li>
     * </ol>
     * <p>
     * The class also provides methods for configuring features and properties of the
     * underlying DOM parser, as well as defining rules for extracting child URLs
     * from specific HTML tags and attributes.
     * </p>
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/impl/UrlFilterImpl.java

    import jakarta.annotation.Resource;
    
    /**
     * Implementation of the {@link UrlFilter} interface.
     * This class provides functionality to filter URLs based on include and exclude patterns.
     * It uses a {@link UrlFilterService} to manage the URL filtering rules.
     * The class supports caching of include and exclude patterns for scenarios where a session ID is not available.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.2K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        }
    
        /**
         * Returns the set of robots.txt URLs.
         * @return The set of robots.txt URLs.
         */
        public Set<String> getRobotsTxtUrlSet() {
            return robotsTxtUrlSet;
        }
    
        /**
         * Sets the set of robots.txt URLs.
         * @param robotsTxtUrlSet The set of robots.txt URLs.
         */
        public void setRobotsTxtUrlSet(final Set<String> robotsTxtUrlSet) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/smb/SmbClientTest.java

            } catch (final ChildUrlsException e) {
                String[] urls = e.getChildUrlList().stream().map(r -> r.getUrl()).sorted().toArray(String[]::new);
                assertEquals(3, urls.length);
                assertEquals(baseUrl + "dir1/", urls[0]);
                assertEquals(baseUrl + "dir3/", urls[1]);
                assertEquals(baseUrl + "file1.txt", urls[2]);
            }
            try {
                smbClient.doGet(baseUrl + "dir1/");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 13.7K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/RuleManager.java

     */
    package org.codelibs.fess.crawler.rule;
    
    import org.codelibs.fess.crawler.entity.ResponseData;
    
    /**
     * The RuleManager interface provides methods to manage rules for processing response data.
     * It allows adding, retrieving, and removing rules, as well as checking for their existence.
     */
    public interface RuleManager {
    
        /**
         * Retrieves the rule associated with the given response data.
         *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleManagerTest.java

            List<Rule> rules = ruleManager.getRules();
            assertEquals(4, rules.size());
            assertEquals("rule1", rules.get(0).getRuleId());
            assertEquals("rule2", rules.get(1).getRuleId());
            assertEquals("rule3", rules.get(2).getRuleId());
            assertEquals("rule4", rules.get(3).getRuleId());
        }
    
        /**
         * Test adding duplicate rules
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 23.8K bytes
    - Viewed (0)
Back to top