Search Options

Display Count
Sort
Preferred Language
Advanced Search

Results 1 - 10 of 135 for Crawler (0.05 seconds)

  1. src/main/java/org/codelibs/fess/exec/Crawler.java

     * </pre>
     */
    public class Crawler {
    
        /**
         * Creates a new instance of Crawler.
         */
        public Crawler() {
            // Default constructor
        }
    
        /** Logger instance for this class. */
        private static final Logger logger = LogManager.getLogger(Crawler.class);
    
        /** Thread name for web and file system crawling process. */
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Thu Mar 26 02:24:08 GMT 2026
    - 32.4K bytes
    - Click Count (0)
  2. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/container/LastaCrawlerContainerTest.java

            // Verify that multiple components can be retrieved
            final Crawler crawler1 = crawlerContainer.getComponent("crawler");
            final Crawler crawler2 = crawlerContainer.getComponent("crawler");
    
            // They should be different instances (prototype scope)
            assertNotNull(crawler1);
            assertNotNull(crawler2);
        }
    
        @Test
        public void test_isLastaCrawlerContainer() {
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Fri Jan 16 13:52:11 GMT 2026
    - 2.7K bytes
    - Click Count (0)
  3. src/main/resources/fess_indices/fess_config.scheduled_job/scheduled_job.bulk

    {"index":{"_index":"fess_config.scheduled_job","_id":"default_crawler"}}
    {"name":"Default Crawler","target":"all","cronExpression":"0 0 * * *","scriptType":"groovy","scriptData":"return container.getComponent(\"crawlJob\").logLevel(\"info\").gcLogging().execute(executor);","jobLogging":true,"crawler":true,"available":true,"sortOrder":1,"createdBy":"system","createdTime":0,"updatedBy":"system","updatedTime":0}
    {"index":{"_index":"fess_config.scheduled_job","_id":"suggest_indexer"}}
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Thu Mar 26 02:24:08 GMT 2026
    - 5K bytes
    - Click Count (0)
  4. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
                crawler1.getCrawlerContext().setNumOfThread(numOfThread);
    
                final Crawler crawler2 = getComponent(Crawler.class);
                crawler2.setBackground(true);
                ((UrlFilterImpl) crawler2.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
                crawler2.addUrl(url2);
                crawler2.getCrawlerContext().setMaxAccessCount(maxCount);
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Jan 15 01:11:43 GMT 2026
    - 7.8K bytes
    - Click Count (0)
  5. CLAUDE.md

    **DI Config**: `fess-crawler-lasta/src/main/resources/`
    - `crawler.xml` (root), `crawler/client.xml`, `crawler/extractor.xml`, `crawler/rule.xml`, `crawler/transformer.xml`, `crawler/transformer_basic.xml`
    - `crawler/mimetype.xml`, `crawler/encoding.xml`, `crawler/robotstxt.xml`, `crawler/sitemaps.xml`
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Mar 12 03:39:20 GMT 2026
    - 8.1K bytes
    - Click Count (0)
  6. fess-crawler-lasta/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                crawler1.addUrl(url1);
                crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
                crawler1.getCrawlerContext().setNumOfThread(numOfThread);
    
                final Crawler crawler2 = crawlerContainer.getComponent("crawler");
                crawler2.setSessionId(crawler2.getSessionId() + "2");
                crawler2.setBackground(true);
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Thu Jan 15 01:11:43 GMT 2026
    - 13.1K bytes
    - Click Count (0)
  7. src/test/java/org/codelibs/fess/exec/CrawlerTest.java

            Crawler.Options options = new Crawler.Options();
            options.sessionId = "test-session";
            options.fileConfigIds = "file1";
    
            int result = crawler.doCrawl(options);
            assertEquals(Constants.EXIT_OK, result);
        }
    
        @Test
        public void test_doCrawl_withDataConfigIds() {
            Crawler.Options options = new Crawler.Options();
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Mar 13 23:01:26 GMT 2026
    - 30.8K bytes
    - Click Count (0)
  8. src/test/java/org/codelibs/fess/helper/PluginHelperTest.java

            assertTrue(pluginHelper.isExcludedName(ArtifactType.CRAWLER, "fess-crawler-lasta"));
            assertTrue(pluginHelper.isExcludedName(ArtifactType.CRAWLER, "fess-crawler-parent"));
            assertTrue(pluginHelper.isExcludedName(ArtifactType.CRAWLER, "fess-crawler-playwright"));
            assertTrue(pluginHelper.isExcludedName(ArtifactType.CRAWLER, "fess-crawler-webdriver"));
    
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Fri Mar 13 23:01:26 GMT 2026
    - 22.6K bytes
    - Click Count (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

     */
    package org.codelibs.fess.crawler;
    
    import java.util.Set;
    import java.util.concurrent.atomic.AtomicInteger;
    import java.util.concurrent.atomic.AtomicLong;
    
    import org.codelibs.core.collection.LruHashSet;
    import org.codelibs.fess.crawler.filter.UrlFilter;
    import org.codelibs.fess.crawler.interval.IntervalController;
    import org.codelibs.fess.crawler.rule.RuleManager;
    
    /**
    Created: Sun Apr 12 03:50:13 GMT 2026
    - Last Modified: Wed Dec 31 09:07:38 GMT 2025
    - 9K bytes
    - Click Count (0)
  10. README.md

    * [LastaFlute](https://github.com/lastaflute/lastaflute "LastaFlute"): Web Framework
    * [Lasta Job](https://github.com/lastaflute/lasta-job "Lasta Job"): Job Scheduler
    * [Fess Crawler](https://github.com/codelibs/fess-crawler "Fess Crawler"): Web Crawler
    Created: Tue Mar 31 13:07:34 GMT 2026
    - Last Modified: Sat Feb 14 03:19:23 GMT 2026
    - 7.8K bytes
    - Click Count (2)
Back to Top