crawl - Code Search

misc/linkcheck/linkcheck.go

		}
		dest := *root + ref
		linkSources[dest] = append(linkSources[dest], url)
		crawl(dest, url)
	}
	for _, id := range pageIDs(body) {
		if *verbose {
			log.Printf(" url %s has #%s", url, id)
		}
		fragExists[urlFrag{url, id}] = true
	}
	return nil
}

func main() {
	flag.Parse()

	go crawlLoop()
	crawl(*root, "")

	wg.Wait()
	close(urlq)
	for uf, needers := range neededFrags {

Registered: Tue Nov 05 11:13:11 UTC 2024

- Last Modified: Wed Oct 06 15:53:04 UTC 2021

- 3.9K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

import org.codelibs.fess.crawler.exception.RobotsTxtException;

/**
 * Robots.txt Specifications:
 * <ul>
 * <li><a href=
 * "https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt"
 * >https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
 * </a></li>
 * </ul>
 *
 * @author bowez
 * @author shinsuke
 *
 */
public class RobotsTxtHelper {

Registered: Sun Nov 10 03:50:12 UTC 2024

- Last Modified: Sat Oct 12 01:40:57 UTC 2024

- 6.1K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt

User-agent: FessCrawler
Disallow:           # allows all 

User-agent: BruteBot
Disallow: /
Allow: /foo/bar/
Crawl-delay: 1314000

# welcome!
User-agent: Googlebot
Crawl-delay: 1

User-agent: *
Disallow: /private/
Disallow: /help        # disallows /help.html, /help/index.html, etc.
Allow: /help/faq.html
Crawl-delay: 3

User-agent: Crawler
Disallow: /aaa

User-agent: Crawler/1.0
Disallow: /bbb

Registered: Sun Nov 10 03:50:12 UTC 2024

- Last Modified: Sun Oct 11 02:16:55 UTC 2015

- 566 bytes

- Viewed (0)

github.com/codelibs/fess

src/main/java/org/codelibs/fess/exec/Crawler.java

                webFsCrawlerThread = new Thread((Runnable) () -> {
                    // crawl web
                    writeTimeToSessionInfo(crawlingInfoHelper, Constants.WEB_FS_CRAWLER_START_TIME);
                    webFsIndexHelper.crawl(options.sessionId, webConfigIdList, fileConfigIdList);
                    writeTimeToSessionInfo(crawlingInfoHelper, Constants.WEB_FS_CRAWLER_END_TIME);

Registered: Thu Oct 31 13:40:30 UTC 2024

- Last Modified: Fri Oct 11 21:20:39 UTC 2024

- 24K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapUrl.java

     * command. Even though search engine crawlers may consider this information
     * when making decisions, they may crawl pages marked "hourly" less
     * frequently than that, and they may crawl pages marked "yearly" more
     * frequently than that. Crawlers may periodically crawl pages marked
     * "never" so that they can handle unexpected changes to those pages.
     */
    private String changefreq;

    /**

Registered: Sun Nov 10 03:50:12 UTC 2024

- Last Modified: Thu Feb 22 01:36:27 UTC 2024

- 4.9K bytes

- Viewed (0)

github.com/gradle/gradle

build-logic-commons/code-quality-rules/src/main/resources/checkstyle/checkstyle-api.xml

  ~ See the License for the specific language governing permissions and
  ~ limitations under the License.
  -->
<!DOCTYPE module PUBLIC
        "-//Puppy Crawl//DTD Check Configuration 1.2//EN"
        "http://www.puppycrawl.com/dtds/configuration_1_2.dtd">
<module name="Checker">
    <module name="SuppressionFilter">
        <property name="file" value="${config_loc}/suppressions.xml"/>

Registered: Wed Nov 06 11:36:14 UTC 2024

- Last Modified: Thu Nov 17 23:20:14 UTC 2022

- 1.6K bytes

- Viewed (0)

github.com/codelibs/fess

src/main/resources/fess_label_de.properties

labels.crawling_info_CrawlerExecTime=Crawler Ausführungsdauer
labels.crawling_info_CrawlerStatus=Crawler-Status
labels.crawling_info_WebFsCrawlExecTime=Crawl Ausführungsdauer (Web/Dateisystem)
labels.crawling_info_WebFsCrawlStartTime=Crawl Startzeit (Web/Dateisystem)
labels.crawling_info_WebFsCrawlEndTime=Crawl Endzeit (Web/Dateisystem)
labels.crawling_info_WebFsIndexExecTime=Indizierung Ausführungsdauer (Web/Dateisystem)

Registered: Thu Oct 31 13:40:30 UTC 2024

- Last Modified: Fri Mar 22 11:58:34 UTC 2024

- 42.8K bytes

- Viewed (0)

github.com/codelibs/fess

src/main/java/org/codelibs/fess/helper/DataIndexHelper.java

    protected int crawlerPriority = Thread.NORM_PRIORITY;

    protected final List<DataCrawlingThread> dataCrawlingThreadList = Collections.synchronizedList(new ArrayList<>());

    public void crawl(final String sessionId) {
        final List<DataConfig> configList = ComponentUtil.getCrawlingConfigHelper().getAllDataConfigList();

        if (configList.isEmpty()) {
            // nothing

Registered: Thu Oct 31 13:40:30 UTC 2024

- Last Modified: Thu Feb 22 01:37:57 UTC 2024

- 12K bytes

- Viewed (0)

github.com/codelibs/fess

src/main/webapp/WEB-INF/view/admin/relatedquery/admin_relatedquery_edit.jsp

<div class="wrapper">
    <jsp:include page="/WEB-INF/view/common/admin/header.jsp"></jsp:include>
    <jsp:include page="/WEB-INF/view/common/admin/sidebar.jsp">
        <jsp:param name="menuCategoryType" value="crawl"/>
        <jsp:param name="menuType" value="relatedQuery"/>
    </jsp:include>
    <div class="content-wrapper">
        <div class="content-header">
            <div class="container-fluid">

Registered: Thu Oct 31 13:40:30 UTC 2024

- Last Modified: Thu Feb 13 07:47:04 UTC 2020

- 4.8K bytes

- Viewed (0)

github.com/codelibs/fess

src/main/resources/fess_label_en.properties

labels.crawling_info_WebFsCrawlExecTime=Crawl exec time (Web/File system)
labels.crawling_info_WebFsCrawlStartTime=Crawl start time (Web/File system)
labels.crawling_info_WebFsCrawlEndTime=Crawl end time (Web/File system)
labels.crawling_info_WebFsIndexExecTime=Indexing exec time (Web/File system)
labels.crawling_info_WebFsIndexSize=Index size (Web/File system)
labels.crawling_info_DataCrawlExecTime=Crawl exec time (Data store)

Registered: Thu Oct 31 13:40:30 UTC 2024

- Last Modified: Fri Mar 22 11:58:34 UTC 2024

- 40.7K bytes

- Viewed (0)

Search Options