Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 54 for Crawling (0.38 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/impl/LogHelperImpl.java

                logger.debug("Crawling access exception at url: {}", urlQueue.getUrl(), e);
            } else if (e.isInfoEnabled()) {
                logger.info(e.getMessage());
            } else if (e.isWarnEnabled()) {
                logger.warn("Crawling access exception at url: {}", urlQueue.getUrl(), e);
            } else if (e.isErrorEnabled()) {
                logger.error("Crawling access exception at url: {}", urlQueue.getUrl(), e);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 02:01:26 UTC 2025
    - 14K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

    import org.codelibs.fess.crawler.service.UrlQueueService;
    
    import jakarta.annotation.Resource;
    
    /**
     * The Crawler class is the main class for web crawling. It manages the crawling process,
     * including adding URLs to the queue, filtering URLs, managing crawler threads,
     * and handling the overall crawling lifecycle.
     *
     * <p>It implements the Runnable interface to be executed in a separate thread,
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 17K bytes
    - Viewed (0)
  3. src/main/java/org/codelibs/fess/helper/DataIndexHelper.java

    /**
     * Helper class for managing data crawling operations in Fess.
     * This class coordinates the execution of data store crawling processes,
     * managing multiple concurrent crawling threads and handling the indexing
     * of crawled documents into the search engine.
     *
     * <p>The DataIndexHelper supports:</p>
     * <ul>
     *   <li>Concurrent crawling of multiple data configurations</li>
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 19K bytes
    - Viewed (0)
  4. src/main/resources/fess_label_en.properties

    labels.createdTime=Created Time
    labels.depth=Depth
    labels.excludedPaths=Excluded Paths for Crawling
    labels.excludedUrls=Excluded URLs for Crawling
    labels.excludedDocPaths=Excluded Paths for Searching
    labels.excludedDocUrls=Excluded URLs for Searching
    labels.hostname=Hostname
    labels.id=ID
    labels.includedPaths=Included Paths for Crawling
    labels.includedUrls=Included URLs for Crawling
    labels.includedDocPaths=Included Paths for Searching
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Sat Dec 13 02:21:17 UTC 2025
    - 44K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/app/web/admin/general/EditForm.java

        @Size(max = 10)
        public String thumbnail;
    
        /**
         * Types of crawling failures to ignore during crawling operations.
         * Specified failure types will not be logged or counted as errors.
         */
        @Size(max = 1000)
        public String ignoreFailureType;
    
        /**
         * Threshold for failure count before stopping crawling of a URL.
         * Set to -1 to disable the threshold check.
         */
        @Required
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Sat Dec 13 02:21:17 UTC 2025
    - 10.6K bytes
    - Viewed (0)
  6. src/main/java/org/codelibs/fess/job/CrawlJob.java

        /**
         * The namespace identifier for the crawling session.
         * Used to organize and identify crawling activities in the system.
         * Defaults to the system crawling info name.
         */
        protected String namespace = Constants.CRAWLING_INFO_SYSTEM_NAME;
    
        /**
         * Array of web crawling configuration IDs to process.
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 19.6K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/fess/exec/Crawler.java

     * <ul>
     * <li>Web crawling - crawls web sites and web content</li>
     * <li>File system crawling - crawls file systems and documents</li>
     * <li>Data store crawling - crawls databases and other data sources</li>
     * <li>Combined crawling - runs multiple crawling types simultaneously</li>
     * </ul>
     *
     * <p>Command line usage:
     * <pre>
     * java org.codelibs.fess.exec.Crawler [options...]
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 31.4K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/helper/CrawlingConfigHelper.java

    import com.google.common.cache.Cache;
    import com.google.common.cache.CacheBuilder;
    
    import jakarta.annotation.PostConstruct;
    
    /**
     * Helper class for managing crawling configurations.
     * Provides functionality to store, retrieve, and manage different types of crawling configurations
     * including web, file, and data configurations. Supports caching and session-based configuration management.
     */
    public class CrawlingConfigHelper {
    
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 19.5K bytes
    - Viewed (1)
  9. CLAUDE.md

    - Graceful degradation (e.g., robots.txt parsing continues on errors)
    
    ---
    
    ## Key Components
    
    ### Crawler (`Crawler.java`)
    
    Main orchestrator for crawling operations.
    
    **Key Methods**:
    ```java
    String execute()                // Start crawling, return session ID
    void addUrl(String url)         // Add URL to queue
    void cleanup(String sessionId)  // Clean up session
    void stop()                     // Stop gracefully
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/app/web/admin/wizard/AdminWizardAction.java

            return redirectWith(getClass(), moreUrl("crawlingConfigForm"));
        }
    
        /**
         * Creates a crawling configuration and proceeds to the start crawling form.
         *
         * @param form the form containing crawling configuration data
         * @return HTML response redirecting to the start crawling form
         */
        @Execute
        @Secured({ ROLE })
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Dec 11 09:47:03 UTC 2025
    - 16.3K bytes
    - Viewed (0)
Back to top