Search Options

Results per page
Sort
Preferred Languages
Advance

Results 21 - 30 of 1,085 for crawled (0.14 sec)

  1. src/main/java/org/codelibs/fess/helper/DataIndexHelper.java

        /**
         * Interval in milliseconds between crawler thread executions.
         * Used to control the rate at which new crawler threads are started
         * and the frequency of status checks.
         */
        protected long crawlingExecutionInterval = Constants.DEFAULT_CRAWLING_EXECUTION_INTERVAL;
    
        /**
         * Thread priority for crawler threads.
         * Defaults to normal thread priority.
         */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 19K bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/util/PrunedTag.java

     * This class defines tag patterns that match HTML elements based on tag name, CSS class, ID, or custom attributes.
     * It is used to identify and remove unwanted HTML elements from crawled documents.
     */
    public class PrunedTag {
        /** The HTML tag name to match (e.g., "div", "span", "p") */
        private final String tag;
        /** The ID attribute value to match */
        private String id;
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Mon Nov 24 04:23:08 UTC 2025
    - 7.1K bytes
    - Viewed (0)
  3. src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java

    import org.codelibs.fess.app.service.FailureUrlService;
    import org.codelibs.fess.crawler.builder.RequestDataBuilder;
    import org.codelibs.fess.crawler.client.CrawlerClient;
    import org.codelibs.fess.crawler.entity.RequestData;
    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.entity.UrlQueue;
    import org.codelibs.fess.crawler.log.LogType;
    import org.codelibs.fess.exception.ContainerNotAvailableException;
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Dec 11 09:47:03 UTC 2025
    - 19.5K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/service/impl/UrlQueueServiceImpl.java

    import org.codelibs.fess.crawler.entity.UrlQueue;
    import org.codelibs.fess.crawler.entity.UrlQueueImpl;
    import org.codelibs.fess.crawler.helper.MemoryDataHelper;
    import org.codelibs.fess.crawler.service.UrlQueueService;
    
    import jakarta.annotation.Resource;
    
    /**
     * Implementation of the {@link UrlQueueService} interface.
     * This class provides methods for managing a queue of URLs to be crawled,
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/app/service/FailureUrlService.java

         *
         * @param crawlingConfig the crawling configuration associated with the failure
         * @param errorName the name/type of the error that occurred
         * @param url the URL that failed to be crawled
         * @param e the exception that caused the failure
         * @return the stored or updated FailureUrl entity, or null if the exception should be ignored
         */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 9.2K bytes
    - Viewed (0)
  6. src/main/java/org/codelibs/fess/helper/ProtocolHelper.java

    /**
     * Helper class for managing and validating URL protocols in Fess crawling system.
     * This class handles the initialization and validation of web and file protocols
     * used by the crawler to determine which URLs can be crawled.
     */
    public class ProtocolHelper {
        private static final Logger logger = LogManager.getLogger(ProtocolHelper.class);
    
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Dec 12 13:58:40 UTC 2025
    - 12.4K bytes
    - Viewed (1)
  7. src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java

    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.Constants;
    import org.codelibs.fess.crawler.entity.AccessResult;
    import org.codelibs.fess.crawler.entity.AccessResultData;
    import org.codelibs.fess.crawler.entity.UrlQueue;
    import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
    import org.codelibs.fess.crawler.util.FieldConfigs;
    import org.codelibs.fess.mylasta.direction.FessConfig;
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Dec 11 09:47:03 UTC 2025
    - 14.1K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/exec/Crawler.java

     * </pre>
     */
    public class Crawler {
    
        /**
         * Creates a new instance of Crawler.
         */
        public Crawler() {
            // Default constructor
        }
    
        /** Logger instance for this class. */
        private static final Logger logger = LogManager.getLogger(Crawler.class);
    
        /** Thread name for web and file system crawling process. */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 31.4K bytes
    - Viewed (0)
  9. samples/crawler/src/main/java/okhttp3/sample/Crawler.java

        OkHttpClient client = new OkHttpClient.Builder()
            .cache(cache)
            .callTimeout(5, TimeUnit.SECONDS)
            .build();
    
        Crawler crawler = new Crawler(client, queueLimit, hostLimit);
        crawler.queue.add(HttpUrl.get(args[1]));
        crawler.parallelDrainQueue(threadCount);
      }
    Registered: Fri Dec 26 11:42:13 UTC 2025
    - Last Modified: Wed Jul 23 00:58:06 UTC 2025
    - 5K bytes
    - Viewed (0)
  10. fess-crawler-lasta/src/main/resources/crawler.xml

    <components namespace="fessCrawler">
    	<include path="crawler/container.xml"/>
    	<include path="crawler/client.xml"/>
    	<include path="crawler/rule.xml"/>
    	<include path="crawler/filter.xml"/>
    	<include path="crawler/interval.xml"/>
    	<include path="crawler/extractor.xml"/>
    	<include path="crawler/mimetype.xml"/>
    	<include path="crawler/encoding.xml"/>
    	<include path="crawler/urlconverter.xml"/>
    	<include path="crawler/log.xml"/>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Tue Nov 28 13:40:25 UTC 2017
    - 1.7K bytes
    - Viewed (0)
Back to top