- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 49 for crawl (0.04 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java
import org.codelibs.fess.crawler.exception.RobotsTxtException; /** * Robots.txt Specifications: * <ul> * <li><a href= * "https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt" * >https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt * </a></li> * </ul> * * @author bowez * @author shinsuke * */ public class RobotsTxtHelper {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 7.7K bytes - Viewed (0) -
fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt
User-agent: FessCrawler Disallow: # allows all User-agent: BruteBot Disallow: / Allow: /foo/bar/ Crawl-delay: 1314000 # welcome! User-agent: Googlebot Crawl-delay: 1 User-agent: * Disallow: /private/ Disallow: /help # disallows /help.html, /help/index.html, etc. Allow: /help/faq.html Crawl-delay: 3 User-agent: Crawler Disallow: /aaa User-agent: Crawler/1.0 Disallow: /bbb
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 566 bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapUrl.java
* command. Even though search engine crawlers may consider this information * when making decisions, they may crawl pages marked "hourly" less * frequently than that, and they may crawl pages marked "yearly" more * frequently than that. Crawlers may periodically crawl pages marked * "never" so that they can handle unexpected changes to those pages. */ private String changefreq; /**
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 6.5K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/app/web/admin/fileconfig/CreateForm.java
@Size(max = 200) public String name; /** The description of the file configuration (maximum 1000 characters). */ @Size(max = 1000) public String description; /** The file paths to crawl (required, must be valid file URIs). */ @Required @UriType(protocolType = ProtocolType.FILE) @CustomSize(maxKey = "form.admin.max.input.size") public String paths;
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 5.6K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/app/web/admin/dataconfig/EditForm.java
* This form extends CreateForm to include fields necessary for updating existing data config entries, * including tracking information for optimistic locking and audit trails. * Data configs define how to crawl and extract data from databases, CSV files, and other data sources. * */ public class EditForm extends CreateForm { /** * Creates a new EditForm instance. */ public EditForm() {
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 2.3K bytes - Viewed (0) -
src/main/webapp/WEB-INF/view/admin/relatedquery/admin_relatedquery_edit.jsp
<div class="wrapper"> <jsp:include page="/WEB-INF/view/common/admin/header.jsp"></jsp:include> <jsp:include page="/WEB-INF/view/common/admin/sidebar.jsp"> <jsp:param name="menuCategoryType" value="crawl"/> <jsp:param name="menuType" value="relatedQuery"/> </jsp:include> <div class="content-wrapper"> <div class="content-header"> <div class="container-fluid">
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Feb 13 07:47:04 UTC 2020 - 4.8K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/app/web/api/admin/webconfig/SearchBody.java
/** * Default constructor. */ public SearchBody() { super(); } /** Name of the web crawling configuration */ public String name; /** URLs to crawl */ public String urls; /** Description of the web crawling configuration */ public String description;
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 1.2K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/app/web/api/admin/fileconfig/SearchBody.java
/** * Default constructor. */ public SearchBody() { super(); } /** Name of the file crawling configuration */ public String name; /** File paths to crawl */ public String paths; /** Description of the file crawling configuration */ public String description;
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 1.2K bytes - Viewed (0) -
src/main/webapp/WEB-INF/view/admin/pathmap/admin_pathmap_edit.jsp
<div class="wrapper"> <jsp:include page="/WEB-INF/view/common/admin/header.jsp"></jsp:include> <jsp:include page="/WEB-INF/view/common/admin/sidebar.jsp"> <jsp:param name="menuCategoryType" value="crawl"/> <jsp:param name="menuType" value="pathMapping"/> </jsp:include> <div class="content-wrapper"> <div class="content-header"> <div class="container-fluid">
Registered: Thu Sep 04 12:52:25 UTC 2025 - Last Modified: Sun Feb 28 06:09:47 UTC 2021 - 7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ResultData.java
import java.util.LinkedHashSet; import java.util.Set; import java.util.function.Function; import org.codelibs.fess.crawler.exception.CrawlerSystemException; /** * This class represents the result data of a crawl. */ public class ResultData implements Serializable { private static final long serialVersionUID = 1L; /** The name of the transformer. */ protected String transformerName;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.7K bytes - Viewed (0)