Search Options

Results per page
Sort
Preferred Languages
Advance

Results 71 - 80 of 167 for Crawling (0.53 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/RobotsTxtException.java

     */
    package org.codelibs.fess.crawler.exception;
    
    /**
     * RobotsTxtException is an exception class that represents an error related to robots.txt processing during web crawling.
     * It extends CrawlerSystemException and provides constructors to create instances with a message and/or a cause.
     *
     */
    public class RobotsTxtException extends CrawlerSystemException {
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.5K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/MimeTypeException.java

     */
    package org.codelibs.fess.crawler.exception;
    
    /**
     * MimeTypeException is a custom exception class that extends CrawlerSystemException.
     * It is used to indicate exceptions related to MIME type handling during the crawling process.
     * This exception can be thrown with a message, a cause, or both.
     */
    public class MimeTypeException extends CrawlerSystemException {
    
        private static final long serialVersionUID = 1L;
    
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  3. CLAUDE.md

    │   ├── exbhv/                 # Extended behaviors (repositories)
    │   └── exentity/              # Extended entities (domain models)
    ├── helper/                    # Cross-cutting utilities
    ├── crawler/                   # Crawling engine
    ├── sso/                       # SSO implementations
    └── ds/                        # Data store connectors
    
    src/main/resources/
    ├── fess_config.properties     # Main config
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 4.8K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ChildUrlsException.java

     */
    package org.codelibs.fess.crawler.exception;
    
    import java.util.Set;
    
    import org.codelibs.fess.crawler.entity.RequestData;
    
    /**
     * {@link ChildUrlsException} is thrown when child URLs are found during crawling.
     * It extends {@link CrawlerSystemException} and holds a set of {@link RequestData}
     * representing the child URLs that caused the exception.
     *
     */
    public class ChildUrlsException extends CrawlerSystemException {
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/app/web/admin/scheduler/EditForm.java

     * This form extends CreateForm to include fields necessary for updating existing scheduler entries,
     * including tracking information for optimistic locking.
     * Schedulers define automated jobs such as crawling and system maintenance tasks.
     *
     */
    public class EditForm extends CreateForm {
    
        /**
         * Creates a new EditForm instance.
         */
        public EditForm() {
            super();
        }
    
        /**
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  6. src/main/java/org/codelibs/fess/helper/ProtocolHelper.java

    import org.codelibs.fess.mylasta.direction.FessConfig;
    import org.codelibs.fess.util.ComponentUtil;
    
    import jakarta.annotation.PostConstruct;
    
    /**
     * Helper class for managing and validating URL protocols in Fess crawling system.
     * This class handles the initialization and validation of web and file protocols
     * used by the crawler to determine which URLs can be crawled.
     */
    public class ProtocolHelper {
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Dec 12 13:58:40 UTC 2025
    - 12.4K bytes
    - Viewed (1)
  7. src/main/resources/fess_config.properties

    crawler.document.fullstop.chars=u002eu06d4u2e3cu3002
    # Encoding for crawling data.
    crawler.crawling.data.encoding=UTF-8
    # Supported web protocols for crawling.
    crawler.web.protocols=http,https
    # Supported file protocols for crawling.
    crawler.file.protocols=file,smb,smb1,ftp,storage,s3,gcs
    # Pattern for environment variable keys in crawling data.
    crawler.data.env.param.key.pattern=^FESS_ENV_.*
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Dec 11 09:47:03 UTC 2025
    - 54.8K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapNews.java

     * This can help Google News index your articles and display them in Google News search results.
     * </p>
     *
     * @see <a href="https://developers.google.com/search/docs/crawling-indexing/sitemaps/news-sitemap">Google News Sitemaps</a>
     */
    public class SitemapNews implements Serializable {
    
        private static final long serialVersionUID = 1L;
    
        /**
         * The name of the news publication.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 13 13:34:36 UTC 2025
    - 4.8K bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/app/web/admin/fileauth/EditForm.java

     * including tracking information for optimistic locking and audit trails.
     * File authentication configurations define access control for file system crawling operations.
     *
     */
    public class EditForm extends CreateForm {
    
        /**
         * Creates a new EditForm instance.
         */
        public EditForm() {
            super();
        }
    
        /**
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 2.4K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/app/web/admin/webconfig/EditForm.java

    import org.lastaflute.web.validation.Required;
    import org.lastaflute.web.validation.theme.conversion.ValidateTypeFailure;
    
    import jakarta.validation.constraints.Size;
    
    /**
     * Form class for editing web crawling configurations in the admin interface.
     * This form extends CreateForm to include fields necessary for updating existing web config entries,
     * including tracking information for optimistic locking and audit trails.
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 2.3K bytes
    - Viewed (0)
Back to top