Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 14 for web (0.17 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/Rule.java

    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.processor.ResponseProcessor;
    
    /**
     * The Rule interface defines the contract for implementing rules that can be applied to
     * response data in a web crawler. Implementations of this interface should provide logic
     * to determine if a given response data matches the rule, retrieve the rule's identifier,
     * and obtain the associated response processor.
     */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.7K bytes
    - Viewed (0)
  2. README.md

    ## Overview
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/RobotsTxtException.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.exception;
    
    /**
     * RobotsTxtException is an exception class that represents an error related to robots.txt processing during web crawling.
     * It extends CrawlerSystemException and provides constructors to create instances with a message and/or a cause.
     *
     */
    public class RobotsTxtException extends CrawlerSystemException {
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.5K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/BinaryTransformer.java

     * into a ResultData object, and for retrieving the binary data from an AccessResultData object.
     * It extends the AbstractTransformer class.
     *
     * <p>
     * This transformer extracts the binary content from the response body of a web resource,
     * stores it as a byte array in the ResultData, and provides a method to retrieve this data
     * as a ByteArrayInputStream.
     * </p>
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/IntervalController.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.interval;
    
    /**
     * The {@code IntervalController} interface defines methods for controlling
     * the delay intervals in a web crawler. It includes constants representing
     * different types of processing states and a method to introduce a delay
     * based on the type of processing.
     * <p>
     * Constants:
     * </p>
     * <ul>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClient.java

    package org.codelibs.fess.crawler.client;
    
    import java.util.Map;
    
    import org.codelibs.fess.crawler.entity.RequestData;
    import org.codelibs.fess.crawler.entity.ResponseData;
    
    /**
     * Interface representing a client for a web crawler.
     * This client is responsible for executing requests and handling responses.
     * It extends {@link AutoCloseable} to allow for resource management.
     */
    public interface CrawlerClient extends AutoCloseable {
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/UrlQueue.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.entity;
    
    /**
     * The UrlQueue interface represents a queue of URLs to be processed by a web crawler.
     * It provides methods to get and set various properties of a URL queue entry.
     *
     * @param <IDTYPE> the type of the identifier for the URL queue entry
     */
    public interface UrlQueue<IDTYPE> {
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 4.3K bytes
    - Viewed (0)
  8. fess-crawler-lasta/src/main/resources/crawler/extractor.xml

    				"application/vnd.airzip.filesecure.azs",
    				"application/vnd.amazon.ebook",
    				"application/vnd.americandynamics.acc",
    				"application/vnd.amiga.ami",
    				"application/vnd.anser-web-certificate-issue-initiation",
    				"application/vnd.anser-web-funds-transfer-initiation",
    				"application/vnd.antix.game-component",
    				"application/vnd.apple.installer+xml",
    				"application/vnd.apple.iwork",
    				"application/vnd.apple.keynote",
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Aug 01 21:40:30 UTC 2020
    - 49K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClientCreator.java

    import org.apache.logging.log4j.Logger;
    import org.codelibs.fess.crawler.container.CrawlerContainer;
    
    import jakarta.annotation.Resource;
    
    /**
     * Creates and manages crawler clients for web crawling operations.
     * This class handles the registration and loading of crawler client factories and their associated clients.
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.5K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapUrl.java

         */
        public SitemapUrl() {
            super();
        }
    
        /**
         * URL of the page. This URL must begin with the protocol (such as http) and
         * end with a trailing slash, if your web server requires it. This value
         * must be less than 2,048 characters.
         */
        private String loc;
    
        /**
         * The date of last modification of the file. This date should be in W3C
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
Back to top