Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 490 for crawlers (0.15 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapUrl.java

         * Please note that the value of this tag is considered a hint and not a
         * command. Even though search engine crawlers may consider this information
         * when making decisions, they may crawl pages marked "hourly" less
         * frequently than that, and they may crawl pages marked "yearly" more
         * frequently than that. Crawlers may periodically crawl pages marked
         * "never" so that they can handle unexpected changes to those pages.
         */
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 4.9K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapFile.java

         * Datetime format.
         *
         * By providing the last modification timestamp, you enable search engine
         * crawlers to retrieve only a subset of the Sitemaps in the index i.e. a
         * crawler may only retrieve Sitemaps that were modified since a certain
         * date. This incremental Sitemap fetching mechanism allows for the rapid
         * discovery of new URLs on very large sites.
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 2.7K bytes
    - Viewed (0)
  3. docs/fr/README.md

    * [LastaFlute](https://github.com/lastaflute/lastaflute "LastaFlute") : Framework Web
    * [Lasta Job](https://github.com/lastaflute/lasta-job "Lasta Job") : Planificateur de tâches
    * [Fess Crawler](https://github.com/codelibs/fess-crawler "Fess Crawler") : Crawler Web
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Sat Oct 12 07:19:47 UTC 2024
    - 7.9K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/exec/Crawler.java

                if (logger.isDebugEnabled()) {
                    logger.debug("Crawler is stopped.", e);
                } else if (logger.isInfoEnabled()) {
                    logger.info("Crawler is stopped.");
                }
                exitCode = Constants.EXIT_FAIL;
            } catch (final Throwable t) {
                logger.error("Crawler does not work correctly.", t);
                exitCode = Constants.EXIT_FAIL;
            } finally {
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Fri Oct 11 21:20:39 UTC 2024
    - 24K bytes
    - Viewed (0)
  5. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
                crawler1.getCrawlerContext().setNumOfThread(numOfThread);
    
                Thread.sleep(100);
    
                final Crawler crawler2 = getComponent(Crawler.class);
                crawler2.setBackground(true);
                ((UrlFilterImpl) crawler2.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
                crawler2.addUrl(url2);
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Nov 07 04:44:10 UTC 2024
    - 7.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

     */
    package org.codelibs.fess.crawler;
    
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.Locale;
    
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.client.CrawlerClientFactory;
    import org.codelibs.fess.crawler.container.CrawlerContainer;
    import org.codelibs.fess.crawler.entity.AccessResult;
    import org.codelibs.fess.crawler.entity.UrlQueue;
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:47:32 UTC 2024
    - 7.9K bytes
    - Viewed (0)
  7. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                crawler1.addUrl(url1);
                crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
                crawler1.getCrawlerContext().setNumOfThread(numOfThread);
    
                final Crawler crawler2 = crawlerContainer.getComponent("crawler");
                crawler2.setSessionId(crawler2.getSessionId() + "2");
                crawler2.setBackground(true);
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:47:32 UTC 2024
    - 11.9K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                crawler1.addUrl(url1);
                crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
                crawler1.getCrawlerContext().setNumOfThread(numOfThread);
    
                final Crawler crawler2 = container.getComponent("crawler");
                crawler2.setSessionId(crawler2.getSessionId() + "2");
                crawler2.setBackground(true);
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 18K bytes
    - Viewed (0)
  9. src/main/resources/crawler_es+crawlerConfig.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" 
    	"http://dbflute.org/meta/lastadi10.dtd">
    <components namespace="fessCrawler">
    	<component name="crawlerConfig" class="org.codelibs.fess.crawler.util.FessCrawlerConfig">
    		<property name="queueIndex">"fess_crawler.queue"</property>
    		<property name="queueShards">5</property>
    		<property name="queueReplicas">1</property>
    Registered: Thu Oct 31 13:40:30 UTC 2024
    - Last Modified: Fri Dec 03 13:06:40 UTC 2021
    - 756 bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/UrlFilter.java

        /**
         * Add an url pattern as a target.
         *
         * @param urlPattern Regular expression that is crawled
         */
        void addInclude(String urlPattern);
    
        /**
         * Add an url pattern as a non-target.
         *
         * @param urlPattern Regular expression that is not crawled
         */
        void addExclude(String urlPattern);
    
        /**
         * Process an url when it's added as a seed url.
         *
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 1.6K bytes
    - Viewed (0)
Back to top