Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 237 for Crawled (0.36 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/UrlFilter.java

        /**
         * Add an url pattern as a target.
         *
         * @param urlPattern Regular expression that is crawled
         */
        void addInclude(String urlPattern);
    
        /**
         * Add an url pattern as a non-target.
         *
         * @param urlPattern Regular expression that is not crawled
         */
        void addExclude(String urlPattern);
    
        /**
         * Process an url when it's added as a seed url.
         *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.6K bytes
    - Viewed (0)
  2. README.md

    });
    
    Crawler crawler = container.getComponent("crawler");
    crawler.addUrl("https://example.com");
    crawler.crawlerContext.setMaxAccessCount(500);
    String sessionId = crawler.execute();
    ```
    
    ### Background Crawling
    
    ```java
    // Configure for background execution
    crawler.setBackground(true);
    String sessionId = crawler.execute();
    
    // Check crawling status
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/Crawler.java

     *
     * <p>Example usage:
     * <pre>
     *   Crawler crawler = new Crawler();
     *   crawler.addUrl("http://example.com/");
     *   crawler.execute();
     *   crawler.close();
     * </pre>
     */
    public class Crawler implements Runnable, AutoCloseable {
    
        private static final Logger logger = LogManager.getLogger(Crawler.class);
    
        /**
         * Service for managing URL queues during crawling.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java

    import org.codelibs.fess.crawler.entity.Sitemap;
    import org.codelibs.fess.crawler.entity.SitemapSet;
    import org.codelibs.fess.crawler.exception.ChildUrlsException;
    import org.codelibs.fess.crawler.helper.SitemapsHelper;
    import org.codelibs.fess.crawler.processor.ResponseProcessor;
    
    import jakarta.annotation.Resource;
    
    /**
     * A response processor implementation that handles sitemaps.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ResponseData.java

            mimeType = contentType;
        }
    
        /**
         * Gets the URL of the crawled resource.
         *
         * @return the URL
         */
        public String getUrl() {
            return url;
        }
    
        /**
         * Sets the URL of the crawled resource.
         *
         * @param url the URL to set
         */
        public void setUrl(final String url) {
            this.url = url;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 11.6K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ExtractException.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.exception;
    
    /**
     * Exception thrown during the extraction process in the crawler.
     * This exception indicates a failure or error that occurred while extracting content from a crawled resource.
     * It extends {@link org.codelibs.fess.crawler.exception.CrawlerSystemException} and provides constructors
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 3K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java

    import org.codelibs.core.io.CopyUtil;
    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.AccessResultData;
    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.entity.ResultData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    
    /**
     * <p>
     * FileTransformer stores the content of a crawled resource as a file on the file system.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 11.7K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java

    import org.apache.tika.metadata.Message;
    import org.apache.tika.metadata.TIFF;
    import org.apache.tika.metadata.TikaCoreProperties;
    import org.apache.tika.metadata.TikaMimeKeys;
    
    /**
     * Represents extracted data from a crawled resource, including content and metadata.
     */
    public class ExtractData
            implements TikaCoreProperties, CreativeCommons, Geographic, HttpHeaders, Message, ClimateForcast, TIFF, TikaMimeKeys, Serializable {
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/UrlQueueImpl.java

     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.entity;
    
    /**
     * Implementation of the {@link UrlQueue} interface.
     * This class represents a URL to be crawled, storing its ID, session ID,
     * HTTP method, URL, metadata, encoding, parent URL, depth, last modified time,
     * creation time, and weight.
     *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.1K bytes
    - Viewed (0)
  10. fess-crawler-opensearch/src/main/java/org/codelibs/fess/crawler/service/impl/OpenSearchUrlQueueService.java

    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.AccessResult;
    import org.codelibs.fess.crawler.entity.OpenSearchUrlQueue;
    import org.codelibs.fess.crawler.entity.UrlQueue;
    import org.codelibs.fess.crawler.exception.OpenSearchAccessException;
    import org.codelibs.fess.crawler.service.UrlQueueService;
    import org.codelibs.fess.crawler.util.OpenSearchCrawlerConfig;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 17K bytes
    - Viewed (1)
Back to top