robust - Code Search

fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

import java.util.Map;
import java.util.regex.Pattern;

import org.codelibs.core.lang.StringUtil;

/**
 * Represents a robots.txt file parser and handler.
 * This class manages the rules defined in a robots.txt file, including user agent directives,
 * allowed/disallowed paths, crawl delays, and sitemap URLs.
 *
 * <p>The robots.txt protocol is implemented according to the standard specification,

Created: Sat Dec 20 11:21:39 GMT 2025

- Last Modified: Mon Nov 24 03:59:47 GMT 2025

- 18.5K bytes

- Click Count (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

 * </ul>
 *
 * <p>References:</p>
 * <ul>
 * <li><a href="https://datatracker.ietf.org/doc/html/rfc9309">RFC 9309 - Robots Exclusion Protocol</a></li>
 * <li><a href="https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt">
 * Google's robots.txt Specification</a></li>
 * </ul>
 *
 * @author bowez
 * @author shinsuke
 *
 */
public class RobotsTxtHelper {

Created: Sat Dec 20 11:21:39 GMT 2025

- Last Modified: Fri Nov 14 12:52:01 GMT 2025

- 11.4K bytes

- Click Count (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

    }

    /**
     * Returns the set of robots.txt URLs.
     * @return The set of robots.txt URLs.
     */
    public Set<String> getRobotsTxtUrlSet() {
        return robotsTxtUrlSet;
    }

    /**
     * Sets the set of robots.txt URLs.
     * @param robotsTxtUrlSet The set of robots.txt URLs.
     */
    public void setRobotsTxtUrlSet(final Set<String> robotsTxtUrlSet) {

Created: Sat Dec 20 11:21:39 GMT 2025

- Last Modified: Sun Jul 06 02:13:03 GMT 2025

- 8.9K bytes

- Click Count (0)

github.com/square/okhttp

android-test/src/test/kotlin/okhttp/android/test/AndroidLoggingTest.kt

  val clientBuilder =
    OkHttpClient.Builder().connectionSpecs(listOf(ConnectionSpec.CLEARTEXT)).dns {
      throw UnknownHostException("shortcircuit")
    }

  val request = Request("http://google.com/robots.txt".toHttpUrl())

  @Test
  fun testHttpLoggingInterceptor() {
    val interceptor =
      HttpLoggingInterceptor().apply {
        level = HttpLoggingInterceptor.Level.BASIC
      }

Created: Fri Dec 26 11:42:13 GMT 2025

- Last Modified: Thu Aug 21 14:27:04 GMT 2025

- 3.2K bytes

- Click Count (0)

github.com/square/okhttp

okhttp/src/jvmTest/kotlin/okhttp3/TestTls13Request.kt

      "https://www.allizom.org/robots.txt",
      "https://tls13.crypto.mozilla.org/",
      "https://tls.ctf.network/robots.txt",
      "https://rustls.jbp.io/",
      "https://h2o.examp1e.net",
      "https://mew.org/",
      "https://tls13.baishancloud.com/",
      "https://tls13.akamai.io/",
      "https://swifttls.org/",
      "https://www.googleapis.com/robots.txt",
      "https://graph.facebook.com/robots.txt",

Created: Fri Dec 26 11:42:13 GMT 2025

- Last Modified: Thu May 22 14:39:30 GMT 2025

- 3.6K bytes

- Click Count (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

            httpClientPropertyMap.put(name, value);
        }
    }

    /**
     * Processes robots.txt for the given URL.
     * This method fetches and parses the robots.txt file to extract disallow/allow rules
     * and sitemap information.
     *
     * @param url The URL to process robots.txt for
     */
    protected void processRobotsTxt(final String url) {
        if (StringUtil.isBlank(url)) {

Created: Sat Dec 20 11:21:39 GMT 2025

- Last Modified: Sun Nov 23 12:19:14 GMT 2025

- 53.7K bytes

- Click Count (0)

github.com/square/okhttp

okhttp-zstd/src/test/java/okhttp3/zstd/ZstdTestMain.kt

      .Builder()
      .addInterceptor(CompressionInterceptor(Zstd))
      .build()

  sendRequest("https://developers.facebook.com/docs/", client)
  sendRequest("https://www.facebook.com/robots.txt", client)
  sendRequest("https://www.instagram.com/robots.txt", client)
}

private fun sendRequest(
  url: String,
  client: OkHttpClient,
) {
  val req = Request.Builder().url(url).build()

  client.newCall(req).execute().use {

Created: Fri Dec 26 11:42:13 GMT 2025

- Last Modified: Tue Jul 29 20:01:04 GMT 2025

- 1.3K bytes

- Click Count (0)

github.com/square/okhttp

regression-test/src/androidTest/java/okhttp/regression/LetsEncryptTest.java

    }

    OkHttpClient client = builder.build();

    sendRequest(client, "https://valid-isrgrootx1.letsencrypt.org/robots.txt");

    try {
      sendRequest(client, "https://google.com/robots.txt");
      if (androidMorEarlier) {
        // will pass with default CAs on N or later
        fail();
      }
    } catch (SSLHandshakeException sslhe) {

Created: Fri Dec 26 11:42:13 GMT 2025

- Last Modified: Tue Nov 17 07:40:31 GMT 2020

- 6.1K bytes

- Click Count (0)

github.com/square/okhttp

okhttp/src/jvmTest/kotlin/okhttp3/CorrettoTest.kt

    client.newCall(request).execute().use {
      assertThat(it.protocol).isEqualTo(Protocol.HTTP_2)
      assertThat(it.handshake!!.tlsVersion).isEqualTo(TlsVersion.TLS_1_3)
    }
  }

  @Test
  @Disabled
  fun testGoogle() {
    assumeNetwork()

    val request = Request.Builder().url("https://google.com/robots.txt").build()

Created: Fri Dec 26 11:42:13 GMT 2025

- Last Modified: Fri Dec 27 13:39:56 GMT 2024

- 2.1K bytes

- Click Count (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java

        // Add URLs to default set
        urlSet.add("http://example.com/robots.txt");
        urlSet.add("http://test.com/robots.txt");
        assertEquals(2, crawlerContext.getRobotsTxtUrlSet().size());

        // Set new set
        Set<String> newSet = new HashSet<>();
        newSet.add("http://new.com/robots.txt");
        crawlerContext.setRobotsTxtUrlSet(newSet);

Created: Sat Dec 20 11:21:39 GMT 2025

- Last Modified: Sat Sep 06 04:15:37 GMT 2025

- 25.6K bytes

- Click Count (0)

Search Options