- Sort Score
- Result 10 results
- Languages All
Results 1 - 5 of 5 for crawl (0.01 sec)
-
fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt
User-agent: FessCrawler Disallow: # allows all User-agent: BruteBot Disallow: / Allow: /foo/bar/ Crawl-delay: 1314000 # welcome! User-agent: Googlebot Crawl-delay: 1 User-agent: * Disallow: /private/ Disallow: /help # disallows /help.html, /help/index.html, etc. Allow: /help/faq.html Crawl-delay: 3 User-agent: Crawler Disallow: /aaa User-agent: Crawler/1.0 Disallow: /bbb
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 566 bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java
import org.codelibs.fess.crawler.exception.RobotsTxtException; /** * Robots.txt Specifications: * <ul> * <li><a href= * "https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt" * >https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt * </a></li> * </ul> * * @author bowez * @author shinsuke * */ public class RobotsTxtHelper {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 7.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapUrl.java
* command. Even though search engine crawlers may consider this information * when making decisions, they may crawl pages marked "hourly" less * frequently than that, and they may crawl pages marked "yearly" more * frequently than that. Crawlers may periodically crawl pages marked * "never" so that they can handle unexpected changes to those pages. */ private String changefreq; /**Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 6.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ResultData.java
import java.util.LinkedHashSet; import java.util.Set; import java.util.function.Function; import org.codelibs.fess.crawler.exception.CrawlerSystemException; /** * This class represents the result data of a crawl. */ public class ResultData implements Serializable { private static final long serialVersionUID = 1L; /** The name of the transformer. */ protected String transformerName;Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.7K bytes - Viewed (0) -
fess-crawler-opensearch/src/main/java/org/codelibs/fess/crawler/entity/OpenSearchAccessResult.java
*/ private boolean initializedData = false; /** * Initializes the access result with response data and result data. * * @param responseData The response data from the crawl operation. * @param resultData The result data from content processing. */ @Override public void init(final ResponseData responseData, final ResultData resultData) {Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 6.5K bytes - Viewed (0)