Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 90 for Crawling (0.04 sec)

  1. README.md

            
            // Configure crawling parameters
            crawler.addUrl("https://example.com");
            crawler.crawlerContext.setMaxAccessCount(100);
            crawler.crawlerContext.setNumOfThread(5);
            crawler.urlFilter.addInclude("https://example.com/.*");
            
            // Execute crawling
            String sessionId = crawler.execute();
            System.out.println("Crawling completed. Session ID: " + sessionId);
        }
    }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/helper/CrawlingInfoHelper.java

        }
    
        /**
         * Stores crawling information and parameters for the specified session.
         * Creates a new crawling info record if none exists or if create flag is true.
         * Also stores any accumulated information parameters and clears the info map.
         *
         * @param sessionId the session ID for the crawling information
         * @param create if true, creates a new crawling info regardless of existing records
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 15.2K bytes
    - Viewed (0)
  3. src/main/java/org/codelibs/fess/helper/DataIndexHelper.java

    /**
     * Helper class for managing data crawling operations in Fess.
     * This class coordinates the execution of data store crawling processes,
     * managing multiple concurrent crawling threads and handling the indexing
     * of crawled documents into the search engine.
     *
     * <p>The DataIndexHelper supports:</p>
     * <ul>
     *   <li>Concurrent crawling of multiple data configurations</li>
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 18.9K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/exec/Crawler.java

     * <ul>
     * <li>Web crawling - crawls web sites and web content</li>
     * <li>File system crawling - crawls file systems and documents</li>
     * <li>Data store crawling - crawls databases and other data sources</li>
     * <li>Combined crawling - runs multiple crawling types simultaneously</li>
     * </ul>
     *
     * <p>Command line usage:
     * <pre>
     * java org.codelibs.fess.exec.Crawler [options...]
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 31K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/app/web/api/admin/crawlinginfo/ApiAdminCrawlinginfoAction.java

                    .status(ApiResult.Status.OK)
                    .result());
        }
    
        /**
         * Retrieves a specific crawling info log by ID.
         *
         * @param id the ID of the crawling info log to retrieve
         * @return JSON response containing the crawling info log data
         */
        // GET /api/admin/crawlinginfo/log/{id}
        @Execute
        public JsonResponse<ApiResult> get$log(final String id) {
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 6.1K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

     *   <li>Handling exceptions that may occur during the crawling process.</li>
     * </ol>
     *
     * <p>
     * The thread also manages the active thread count using {@code crawlerContext.activeThreadCountLock}
     * and provides methods for logging messages using {@link LogHelper}.
     * </p>
     *
     * <p>
     * The crawling process continues until the crawler status is {@link CrawlerStatus#DONE} or the
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 20.4K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/fess/ds/callback/FileListIndexUpdateCallbackImpl.java

        }
    
        /**
         * Adds a document to the search index by crawling the specified URL and processing the content.
         * This method handles recursive crawling with depth and access count limits, follows redirects,
         * and processes child URLs discovered during crawling.
         *
         * @param paramMap the data store parameters containing crawling configuration
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 28.9K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/indexer/IndexUpdater.java

        public void setSessionIdList(final List<String> sessionIdList) {
            this.sessionIdList = sessionIdList;
        }
    
        /**
         * Sets the flag indicating whether crawling should be finished.
         *
         * @param finishCrawling true if crawling should be finished, false otherwise
         */
        public void setFinishCrawling(final boolean finishCrawling) {
            this.finishCrawling = finishCrawling;
        }
    
        /**
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Tue Aug 19 14:09:36 UTC 2025
    - 32.7K bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/app/web/admin/crawlinginfo/AdminCrawlinginfoAction.java

    import org.lastaflute.web.response.HtmlResponse;
    import org.lastaflute.web.response.render.RenderData;
    import org.lastaflute.web.ruts.process.ActionRuntime;
    
    import jakarta.annotation.Resource;
    
    /**
     * Admin action for Crawling Info management.
     *
     */
    public class AdminCrawlinginfoAction extends FessAdminAction {
    
        /**
         * Default constructor.
         */
        public AdminCrawlinginfoAction() {
            super();
        }
    
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 11.4K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/util/GsaConfigParser.java

    /**
     * Parser for Google Search Appliance (GSA) configuration files.
     * This SAX-based parser reads GSA XML configuration files and converts them into
     * Fess configuration objects including web crawling configurations, file crawling
     * configurations, and label types for access control.
     *
     * <p>The parser handles the following GSA configuration elements:
     * <ul>
     * <li>Collections with good/bad URL patterns</li>
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 21.5K bytes
    - Viewed (0)
Back to top