Search Options

Display Count
Sort
Preferred Language
Advanced Search

Results 31 - 40 of 140 for robust (0.09 seconds)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

    import java.util.Map;
    import java.util.regex.Pattern;
    
    import org.codelibs.core.lang.StringUtil;
    
    /**
     * Represents a robots.txt file parser and handler.
     * This class manages the rules defined in a robots.txt file, including user agent directives,
     * allowed/disallowed paths, crawl delays, and sitemap URLs.
     *
     * <p>The robots.txt protocol is implemented according to the standard specification,
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Mon Nov 24 03:59:47 GMT 2025
    - 18.5K bytes
    - Click Count (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

     * </ul>
     *
     * <p>References:</p>
     * <ul>
     * <li><a href="https://datatracker.ietf.org/doc/html/rfc9309">RFC 9309 - Robots Exclusion Protocol</a></li>
     * <li><a href="https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt">
     * Google's robots.txt Specification</a></li>
     * </ul>
     *
     * @author bowez
     * @author shinsuke
     *
     */
    public class RobotsTxtHelper {
    
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Fri Nov 14 12:52:01 GMT 2025
    - 11.4K bytes
    - Click Count (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        }
    
        /**
         * Returns the set of robots.txt URLs.
         * @return The set of robots.txt URLs.
         */
        public Set<String> getRobotsTxtUrlSet() {
            return robotsTxtUrlSet;
        }
    
        /**
         * Sets the set of robots.txt URLs.
         * @param robotsTxtUrlSet The set of robots.txt URLs.
         */
        public void setRobotsTxtUrlSet(final Set<String> robotsTxtUrlSet) {
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sun Jul 06 02:13:03 GMT 2025
    - 8.9K bytes
    - Click Count (0)
  4. android-test/src/test/kotlin/okhttp/android/test/AndroidLoggingTest.kt

      val clientBuilder =
        OkHttpClient.Builder().connectionSpecs(listOf(ConnectionSpec.CLEARTEXT)).dns {
          throw UnknownHostException("shortcircuit")
        }
    
      val request = Request("http://google.com/robots.txt".toHttpUrl())
    
      @Test
      fun testHttpLoggingInterceptor() {
        val interceptor =
          HttpLoggingInterceptor().apply {
            level = HttpLoggingInterceptor.Level.BASIC
          }
    
    Created: Fri Dec 26 11:42:13 GMT 2025
    - Last Modified: Thu Aug 21 14:27:04 GMT 2025
    - 3.2K bytes
    - Click Count (0)
  5. okhttp/src/jvmTest/kotlin/okhttp3/TestTls13Request.kt

          "https://www.allizom.org/robots.txt",
          "https://tls13.crypto.mozilla.org/",
          "https://tls.ctf.network/robots.txt",
          "https://rustls.jbp.io/",
          "https://h2o.examp1e.net",
          "https://mew.org/",
          "https://tls13.baishancloud.com/",
          "https://tls13.akamai.io/",
          "https://swifttls.org/",
          "https://www.googleapis.com/robots.txt",
          "https://graph.facebook.com/robots.txt",
    Created: Fri Dec 26 11:42:13 GMT 2025
    - Last Modified: Thu May 22 14:39:30 GMT 2025
    - 3.6K bytes
    - Click Count (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                httpClientPropertyMap.put(name, value);
            }
        }
    
        /**
         * Processes robots.txt for the given URL.
         * This method fetches and parses the robots.txt file to extract disallow/allow rules
         * and sitemap information.
         *
         * @param url The URL to process robots.txt for
         */
        protected void processRobotsTxt(final String url) {
            if (StringUtil.isBlank(url)) {
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sun Nov 23 12:19:14 GMT 2025
    - 53.7K bytes
    - Click Count (0)
  7. okhttp-zstd/src/test/java/okhttp3/zstd/ZstdTestMain.kt

          .Builder()
          .addInterceptor(CompressionInterceptor(Zstd))
          .build()
    
      sendRequest("https://developers.facebook.com/docs/", client)
      sendRequest("https://www.facebook.com/robots.txt", client)
      sendRequest("https://www.instagram.com/robots.txt", client)
    }
    
    private fun sendRequest(
      url: String,
      client: OkHttpClient,
    ) {
      val req = Request.Builder().url(url).build()
    
      client.newCall(req).execute().use {
    Created: Fri Dec 26 11:42:13 GMT 2025
    - Last Modified: Tue Jul 29 20:01:04 GMT 2025
    - 1.3K bytes
    - Click Count (0)
  8. regression-test/src/androidTest/java/okhttp/regression/LetsEncryptTest.java

        }
    
        OkHttpClient client = builder.build();
    
        sendRequest(client, "https://valid-isrgrootx1.letsencrypt.org/robots.txt");
    
        try {
          sendRequest(client, "https://google.com/robots.txt");
          if (androidMorEarlier) {
            // will pass with default CAs on N or later
            fail();
          }
        } catch (SSLHandshakeException sslhe) {
    Created: Fri Dec 26 11:42:13 GMT 2025
    - Last Modified: Tue Nov 17 07:40:31 GMT 2020
    - 6.1K bytes
    - Click Count (0)
  9. okhttp/src/jvmTest/kotlin/okhttp3/CorrettoTest.kt

        client.newCall(request).execute().use {
          assertThat(it.protocol).isEqualTo(Protocol.HTTP_2)
          assertThat(it.handshake!!.tlsVersion).isEqualTo(TlsVersion.TLS_1_3)
        }
      }
    
      @Test
      @Disabled
      fun testGoogle() {
        assumeNetwork()
    
        val request = Request.Builder().url("https://google.com/robots.txt").build()
    
    Created: Fri Dec 26 11:42:13 GMT 2025
    - Last Modified: Fri Dec 27 13:39:56 GMT 2024
    - 2.1K bytes
    - Click Count (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java

            // Add URLs to default set
            urlSet.add("http://example.com/robots.txt");
            urlSet.add("http://test.com/robots.txt");
            assertEquals(2, crawlerContext.getRobotsTxtUrlSet().size());
    
            // Set new set
            Set<String> newSet = new HashSet<>();
            newSet.add("http://new.com/robots.txt");
            crawlerContext.setRobotsTxtUrlSet(newSet);
    Created: Sat Dec 20 11:21:39 GMT 2025
    - Last Modified: Sat Sep 06 04:15:37 GMT 2025
    - 25.6K bytes
    - Click Count (0)
Back to Top