- Sort Score
- Result 10 results
- Languages All
Results 1 - 9 of 9 for Allow (0.01 sec)
-
fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt
User-agent: FessCrawler Disallow: # allows all User-agent: BruteBot Disallow: / Allow: /foo/bar/ Crawl-delay: 1314000 # welcome! User-agent: Googlebot Crawl-delay: 1 User-agent: * Disallow: /private/ Disallow: /help # disallows /help.html, /help/index.html, etc. Allow: /help/faq.html Crawl-delay: 3 User-agent: Crawler Disallow: /aaa User-agent: Crawler/1.0 Disallow: /bbb
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 566 bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java
protected static final Pattern DISALLOW_RECORD = Pattern.compile("^disallow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE); /** Pattern for parsing allow records. */ protected static final Pattern ALLOW_RECORD = Pattern.compile("^allow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE); /** Pattern for parsing crawl-delay records. */
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 7.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/Sitemap.java
package org.codelibs.fess.crawler.entity; import java.io.Serializable; /** * The Sitemap interface represents a sitemap entity with location and last modification date. * It extends the Serializable interface to allow sitemap objects to be serialized. */ public interface Sitemap extends Serializable { /** * Retrieves the location (URL) of the sitemap. * * @return the location of the sitemap as a String.Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClient.java
import org.codelibs.fess.crawler.entity.ResponseData; /** * Interface representing a client for a web crawler. * This client is responsible for executing requests and handling responses. * It extends {@link AutoCloseable} to allow for resource management. */ public interface CrawlerClient extends AutoCloseable { /** * Sets the initialization parameters for the crawler client. *Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.8K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/suggest/concurrent/Deferred.java
* to be executed when the computation is complete. * The Promise class has then and error methods that allow you to register callbacks * for successful and unsuccessful computations, respectively. * </p> * * <p> * The Deferred class uses a CountDownLatch to allow you to wait for the computation to complete. * The resolve and reject methods decrement the CountDownLatch, allowing the getResponse method
Registered: Fri Sep 19 09:08:11 UTC 2025 - Last Modified: Fri Jul 04 14:00:23 UTC 2025 - 7.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10K bytes - Viewed (0) -
fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java
final String clusterName = UUID.randomUUID().toString(); runner.onBuild((number, settingsBuilder) -> { settingsBuilder.put("http.cors.enabled", true); settingsBuilder.put("http.cors.allow-origin", "*"); settingsBuilder.put("discovery.type", "single-node"); }).build(newConfigs().clusterName(clusterName).numOfNode(1)); // wait for yellow status runner.ensureYellow();
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 7.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java
this.useRobotsTxtDisallows = useRobotsTxtDisallows; } /** * Sets whether to use robots.txt allow rules. * * @param useRobotsTxtAllows True to use allow rules, false otherwise */ public void setUseRobotsTxtAllows(final boolean useRobotsTxtAllows) { this.useRobotsTxtAllows = useRobotsTxtAllows; }Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 52.2K bytes - Viewed (0) -
fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml
<!-- and a lower priority than pcap - TIKA-4174 --> <magic priority="55"> <match minShouldMatch="2"> <match value="user-agent:" type="stringignorecase" offset="0"/> <match value="allow:" type="stringignorecase" offset="0"/> <match value="disallow:" type="stringignorecase" offset="0"/> <match value="sitemap:" type="stringignorecase" offset="0"/>Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Mar 13 08:18:01 UTC 2025 - 320.1K bytes - Viewed (1)