Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 49 for crawl (0.04 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

    import org.codelibs.fess.crawler.exception.RobotsTxtException;
    
    /**
     * Robots.txt Specifications:
     * <ul>
     * <li><a href=
     * "https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt"
     * >https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
     * </a></li>
     * </ul>
     *
     * @author bowez
     * @author shinsuke
     *
     */
    public class RobotsTxtHelper {
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  2. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt

    User-agent: FessCrawler
    Disallow:           # allows all 
    
    User-agent: BruteBot
    Disallow: /
    Allow: /foo/bar/
    Crawl-delay: 1314000
    
    # welcome!
    User-agent: Googlebot
    Crawl-delay: 1
    
    User-agent: *
    Disallow: /private/
    Disallow: /help        # disallows /help.html, /help/index.html, etc.
    Allow: /help/faq.html
    Crawl-delay: 3
    
    User-agent: Crawler
    Disallow: /aaa
    
    User-agent: Crawler/1.0
    Disallow: /bbb
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 566 bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapUrl.java

         * command. Even though search engine crawlers may consider this information
         * when making decisions, they may crawl pages marked "hourly" less
         * frequently than that, and they may crawl pages marked "yearly" more
         * frequently than that. Crawlers may periodically crawl pages marked
         * "never" so that they can handle unexpected changes to those pages.
         */
        private String changefreq;
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java

        @Size(max = 200)
        public String name;
    
        /** The description of the file configuration (maximum 1000 characters). */
        @Size(max = 1000)
        public String description;
    
        /** The file paths to crawl (required, must be valid file URIs). */
        @Required
        @UriType(protocolType = ProtocolType.FILE)
        @CustomSize(maxKey = "form.admin.max.input.size")
        public String paths;
    
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 5.6K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/app/web/admin/dataconfig/EditForm.java

     * This form extends CreateForm to include fields necessary for updating existing data config entries,
     * including tracking information for optimistic locking and audit trails.
     * Data configs define how to crawl and extract data from databases, CSV files, and other data sources.
     *
     */
    public class EditForm extends CreateForm {
    
        /**
         * Creates a new EditForm instance.
         */
        public EditForm() {
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 2.3K bytes
    - Viewed (0)
  6. src/main/webapp/WEB-INF/view/admin/relatedquery/admin_relatedquery_edit.jsp

    <div class="wrapper">
        <jsp:include page="/WEB-INF/view/common/admin/header.jsp"></jsp:include>
        <jsp:include page="/WEB-INF/view/common/admin/sidebar.jsp">
            <jsp:param name="menuCategoryType" value="crawl"/>
            <jsp:param name="menuType" value="relatedQuery"/>
        </jsp:include>
        <div class="content-wrapper">
            <div class="content-header">
                <div class="container-fluid">
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Feb 13 07:47:04 UTC 2020
    - 4.8K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/fess/app/web/api/admin/webconfig/SearchBody.java

        /**
         * Default constructor.
         */
        public SearchBody() {
            super();
        }
    
        /** Name of the web crawling configuration */
        public String name;
    
        /** URLs to crawl */
        public String urls;
    
        /** Description of the web crawling configuration */
        public String description;
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 1.2K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/app/web/api/admin/fileconfig/SearchBody.java

        /**
         * Default constructor.
         */
        public SearchBody() {
            super();
        }
    
        /** Name of the file crawling configuration */
        public String name;
    
        /** File paths to crawl */
        public String paths;
    
        /** Description of the file crawling configuration */
        public String description;
    
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 1.2K bytes
    - Viewed (0)
  9. src/main/webapp/WEB-INF/view/admin/pathmap/admin_pathmap_edit.jsp

    <div class="wrapper">
        <jsp:include page="/WEB-INF/view/common/admin/header.jsp"></jsp:include>
        <jsp:include page="/WEB-INF/view/common/admin/sidebar.jsp">
            <jsp:param name="menuCategoryType" value="crawl"/>
            <jsp:param name="menuType" value="pathMapping"/>
        </jsp:include>
        <div class="content-wrapper">
            <div class="content-header">
                <div class="container-fluid">
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Sun Feb 28 06:09:47 UTC 2021
    - 7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ResultData.java

    import java.util.LinkedHashSet;
    import java.util.Set;
    import java.util.function.Function;
    
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    
    /**
     * This class represents the result data of a crawl.
     */
    public class ResultData implements Serializable {
        private static final long serialVersionUID = 1L;
    
        /** The name of the transformer. */
        protected String transformerName;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.7K bytes
    - Viewed (0)
Back to top