Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 48 for Crawling (0.06 sec)

  1. README.md

    [![Java CI with Maven](https://github.com/codelibs/fess-crawler/actions/workflows/maven.yml/badge.svg)](https://github.com/codelibs/fess-crawler/actions/workflows/maven.yml)
    =======
    
    ## Overview
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Sat Jun 19 01:09:45 UTC 2021
    - 286 bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java

            }
    
            if (cae.isDebugEnabled()) {
                logger.debug("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
            } else if (cae.isInfoEnabled()) {
                logger.info("[{}] {}", failureUrlId, cae.getMessage());
            } else if (cae.isWarnEnabled()) {
                logger.warn("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Fri Oct 11 21:11:58 UTC 2024
    - 7.3K bytes
    - Viewed (0)
  3. src/main/java/org/codelibs/fess/job/PurgeLogJob.java

            final StringBuilder resultBuf = new StringBuilder();
    
            // purge crawling sessions
            try {
                crawlingInfoService.deleteBefore(systemHelper.getCurrentTimeAsLong());
            } catch (final Exception e) {
                logger.error("Failed to purge crawling sessions.", e);
                resultBuf.append(e.getMessage()).append("\n");
            }
    
            // purge search logs
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Thu Feb 22 01:37:57 UTC 2024
    - 3.7K bytes
    - Viewed (0)
  4. README.md

    You can register crawling targets in the Admin UI on the (Web, File, Data Store) crawler configuration pages, and then start the Crawler manually on the [Scheduler page](https://fess.codelibs.org/14.17/admin/scheduler-guide.html).
    
    ## Migration from another search provider
    
    Please see [MIGRATION.md](MIGRATION.md).
    
    ## Data Store
    
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Sat Oct 12 07:19:47 UTC 2024
    - 7.3K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        protected ThreadLocal<String[]> sitemapsLocal = new ThreadLocal<>();
    
        /** The number of a thread */
        protected int numOfThread = 10;
    
        protected int maxThreadCheckCount = 20;
    
        /** a max depth for crawling. -1 is no depth check. */
        protected int maxDepth = -1;
    
        /** a max count to access urls. 0 is no limit to access it. */
        protected long maxAccessCount = 0;
    
        public String getSessionId() {
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Sat Oct 12 01:40:57 UTC 2024
    - 4.5K bytes
    - Viewed (0)
  6. src/main/java/org/codelibs/fess/helper/PathMappingHelper.java

            if (sessionId == null) {
                return null;
            }
            return pathMappingMap.get(sessionId);
        }
    
        public String replaceUrl(final String sessionId, final String url) { // for crawling
            final List<PathMapping> pathMappingList = getPathMappingList(sessionId);
            if (pathMappingList == null) {
                return url;
            }
            return replaceUrl(pathMappingList, url);
        }
    
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Thu Feb 22 01:53:18 UTC 2024
    - 7.6K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

    import org.codelibs.fess.crawler.service.UrlQueueService;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import jakarta.annotation.Resource;
    
    /**
     * Crawler manages/controls a crawling information.
     *
     * @author shinsuke
     *
     */
    public class Crawler implements Runnable, AutoCloseable {
    
        private static final Logger logger = LoggerFactory.getLogger(Crawler.class);
    
        @Resource
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:47:32 UTC 2024
    - 7.9K bytes
    - Viewed (0)
  8. test-site/app/models/ContentsCreator.java

            Queue<String> queue = getUrls();
    
            String url;
            int count = 0;
            while((url = queue.poll()) != null && count < max) {
    
                try {
                    Logger.info("crawling: " + url);
                    HttpUriRequest request = new HttpGet(url);
                    HttpResponse response = httpClient.execute(request);
                    if(response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
    Registered: Fri Nov 08 09:08:12 UTC 2024
    - Last Modified: Fri Nov 06 08:48:32 UTC 2015
    - 3.7K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/DefaultResponseProcessor.java

                    if (logger.isDebugEnabled()) {
                        logger.debug("Canceled urlQueue: {}", urlQueue);
                    }
                    // cancel crawling
                    crawlerContext.decrementAndGetAccessCount();
                    final List<UrlQueue<?>> newUrlQueueList = new ArrayList<>();
                    newUrlQueueList.add(urlQueue);
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:47:32 UTC 2024
    - 9K bytes
    - Viewed (0)
  10. src/test/java/org/codelibs/fess/it/admin/CrawlerLogTests.java

        }
    
        @Test
        void searchListTest() {
            logger.info("start searchListTest");
            testReadSearchList();
            testDeleteSearchList();
        }
    
        /**
         * Methods for a Web Crawling Job
         * */
        private static void createWebConfig() {
            final Map<String, Object> requestBody = new HashMap<>();
            final String urls = "https://www.codelibs.org/" + "\n" + "http://failure.url";
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Thu Feb 22 01:37:57 UTC 2024
    - 9.1K bytes
    - Viewed (0)
Back to top