Search Options

Results per page
Sort
Preferred Languages
Advance

Results 11 - 20 of 156 for roots (1 sec)

  1. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt

    Shinsuke Sugaya <******@****.***> 1444529815 +0900
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 566 bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

        /** XPath expression for extracting robots content from meta tags */
        private static final String META_NAME_ROBOTS_CONTENT = "//META[@name=\"robots\" or @name=\"ROBOTS\"]/@content";
    
        /** Robots tag value indicating no indexing or following */
        private static final String ROBOTS_TAG_NONE = "none";
    
        /** Robots tag value indicating no indexing */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Dec 12 13:58:40 UTC 2025
    - 54.6K bytes
    - Viewed (0)
  3. src/main/java/jcifs/internal/dfs/DfsReferralDataImpl.java

            dr.domain = n.getDomain();
            return dr;
        }
    
        /*
         * Split DFS path like \fs1.example.com\root5\link2\foo\bar.txt into at
         * most 3 components (not including the first index which is always empty):
         * result[0] = ""
         * result[1] = "fs1.example.com"
         * result[2] = "root5"
         * result[3] = "link2\foo\bar.txt"
         */
    Registered: Sat Dec 20 13:44:44 UTC 2025
    - Last Modified: Sat Aug 16 01:32:48 UTC 2025
    - 11.7K bytes
    - Viewed (0)
  4. CLAUDE.md

    - **Testing**: JUnit 4, UTFlute, Mockito 5.7.0
    - **Storage**: In-memory (default), OpenSearch (optional)
    
    ### Protocols
    
    - **HTTP/HTTPS**: Full crawling, cookies, auth, robots.txt
    - **File**: Local/network file systems
    - **FTP**: With authentication
    - **SMB/CIFS**: Windows shares (SMB1/SMB2+)
    - **Storage**: MinIO/S3-compatible
    
    ### Content Formats
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            robotsTxtHelper = container.getComponent("robotsTxtHelper");
        }
    
        public void testParse() {
            RobotsTxt robotsTxt;
            final InputStream in = RobotsTxtHelperTest.class.getResourceAsStream("robots.txt");
            try {
                robotsTxt = robotsTxtHelper.parse(in);
            } finally {
                CloseableUtil.closeQuietly(in);
            }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 20.6K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

    import java.util.Map;
    import java.util.regex.Pattern;
    
    import org.codelibs.core.lang.StringUtil;
    
    /**
     * Represents a robots.txt file parser and handler.
     * This class manages the rules defined in a robots.txt file, including user agent directives,
     * allowed/disallowed paths, crawl delays, and sitemap URLs.
     *
     * <p>The robots.txt protocol is implemented according to the standard specification,
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 18.5K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/core/io/Traverser.java

         * <p>
         * If a root package is specified at instance construction, only classes under the root package are targeted.
         * </p>
         *
         * @param handler the handler to process classes
         */
        void forEach(ClassHandler handler);
    
        /**
         * Searches for resources handled by this instance and calls the handler for each resource.
         * <p>
    Registered: Sat Dec 20 08:55:33 UTC 2025
    - Last Modified: Thu Jun 19 09:12:22 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

     * </ul>
     *
     * <p>References:</p>
     * <ul>
     * <li><a href="https://datatracker.ietf.org/doc/html/rfc9309">RFC 9309 - Robots Exclusion Protocol</a></li>
     * <li><a href="https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt">
     * Google's robots.txt Specification</a></li>
     * </ul>
     *
     * @author bowez
     * @author shinsuke
     *
     */
    public class RobotsTxtHelper {
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 11.4K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java

            // Add URLs to default set
            urlSet.add("http://example.com/robots.txt");
            urlSet.add("http://test.com/robots.txt");
            assertEquals(2, crawlerContext.getRobotsTxtUrlSet().size());
    
            // Set new set
            Set<String> newSet = new HashSet<>();
            newSet.add("http://new.com/robots.txt");
            crawlerContext.setRobotsTxtUrlSet(newSet);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 25.6K bytes
    - Viewed (0)
  10. src/main/java/jcifs/smb1/util/transport/TransportException.java

        }
    
        /**
         * Constructs a new TransportException with the specified root cause.
         *
         * @param rootCause the root cause of this exception
         */
        public TransportException(final Throwable rootCause) {
            this.rootCause = rootCause;
        }
    
        /**
         * Constructs a new TransportException with the specified detail message and root cause.
         *
         * @param msg the detail message
    Registered: Sat Dec 20 13:44:44 UTC 2025
    - Last Modified: Sat Aug 16 01:32:48 UTC 2025
    - 1.8K bytes
    - Viewed (0)
Back to top