Burles - Code Search

fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleTest.java

        List<Rule> rules = new ArrayList<>();

        // Create rules with unique IDs
        for (int i = 0; i < 100; i++) {
            TestRule rule = new TestRule("rule_" + i, new TestResponseProcessor("processor_" + i), true);
            rules.add(rule);
        }

        // Verify all IDs are unique
        for (int i = 0; i < rules.size(); i++) {

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Wed Sep 03 14:42:53 UTC 2025

- 22.7K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/impl/DefaultIntervalController.java

 * Default implementation of the IntervalController.
 * This class provides a default way to manage delays between crawler operations.
 * It allows setting delays before processing, after processing, when no URLs are in the queue,
 * and when waiting for new URLs.
 * The delays are configurable via constructor parameters.
 *
 */
public class DefaultIntervalController extends AbstractIntervalController {

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 3.4K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java

        this.path = path;
    }

    /**
     * Gets the replacement string for question marks in URLs.
     *
     * @return the question mark replacement string
     */
    public String getQuestionStr() {
        return questionStr;
    }

    /**
     * Sets the replacement string for question marks in URLs.
     *
     * @param questionStr the question mark replacement string to set
     */

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 11.7K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/service/impl/UrlFilterServiceImpl.java

import org.codelibs.fess.crawler.service.UrlFilterService;

import jakarta.annotation.Resource;

/**
 * Implementation of the {@link UrlFilterService} interface.
 * This class provides methods for managing URL filtering rules,
 * including adding include and exclude URL patterns, deleting patterns,
 * and retrieving lists of compiled URL patterns. It utilizes a
 * {@link MemoryDataHelper} to store and manage the URL patterns in memory.
 *
 */

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 4.2K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

    /**
     * Sets whether to use robots.txt disallow rules.
     *
     * @param useRobotsTxtDisallows True to use disallow rules, false otherwise
     */
    public void setUseRobotsTxtDisallows(final boolean useRobotsTxtDisallows) {
        this.useRobotsTxtDisallows = useRobotsTxtDisallows;
    }

    /**
     * Sets whether to use robots.txt allow rules.
     *

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 52.2K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapFile.java

 * This class holds information about a single Sitemap, including its location and last modification timestamp.
 * It implements the {@link Sitemap} interface.
 *
 * <p>
 * A Sitemap file provides search engines with a list of URLs available for crawling.
 * This class encapsulates the essential attributes of a Sitemap entry, allowing for efficient management
 * and processing of Sitemap data.
 * </p>
 *
 * <p>

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 4.4K bytes

- Viewed (1)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java

 * <ul>
 *   <li>Static passwords configured via {@link #addPassword(String, String)}</li>
 *   <li>Dynamic passwords provided through extraction parameters</li>
 * </ul>
 *
 * <p>Passwords are matched against URLs or resource names using regular expression patterns.
 * The extractor first tries to match against the URL, then falls back to the resource name if available.
 *
 * @author shinsuke
 */

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 5.1K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/log/LogType.java

    REDIRECT_LOCATION,
    /** Indicates processing a response. */
    PROCESS_RESPONSE,
    /** Indicates the crawling process has finished. */
    FINISHED_CRAWLING,
    /** Indicates processing child URLs due to an exception. */
    PROCESS_CHILD_URLS_BY_EXCEPTION,
    /** Indicates processing a child URL due to an exception. */
    PROCESS_CHILD_URL_BY_EXCEPTION,
    /** Indicates an access exception during crawling. */

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 2.4K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/DefaultResponseProcessor.java

        }
        return true;
    }

    /**
     * Stores child URLs found in the response data.
     *
     * @param crawlerContext the crawler context
     * @param childUrlList the set of child URLs
     * @param url the parent URL
     * @param depth the depth of the child URLs
     * @param encoding the encoding of the child URLs
     */

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 12.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapUrl.java

     * influence the position of your URLs in a search engine's result pages.
     * Search engines may use this information when selecting between URLs on
     * the same site, so you can use this tag to increase the likelihood that
     * your most important pages are present in a search index.
     *
     * Also, please note that assigning a high priority to all of the URLs on

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 6.5K bytes

- Viewed (0)

Search Options