Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 7 of 7 for RobotsTxt (0.12 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

    import java.util.ArrayList;
    import java.util.LinkedHashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Pattern;
    
    import org.codelibs.core.lang.StringUtil;
    
    public class RobotsTxt {
        private static final String ALL_BOTS = "*";
    
        protected final Map<Pattern, Directive> directiveMap = new LinkedHashMap<>();
    
        private final List<String> sitemapList = new ArrayList<>();
    
    Registered: Wed Jun 12 15:17:51 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 4.8K bytes
    - Viewed (0)
  2. fess-crawler-lasta/src/main/resources/crawler/robotstxt.xml

    Shinsuke Sugaya <******@****.***> 1444529815 +0900
    Registered: Wed Jun 12 15:17:51 UTC 2024
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 367 bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            assertTrue(robotsTxt.allows("/bbb", "Crawler"));
            assertTrue(robotsTxt.allows("/ccc", "Crawler"));
            assertTrue(robotsTxt.allows("/ddd", "Crawler"));
            assertTrue(robotsTxt.allows("/aaa", "Crawler/1.0"));
            assertFalse(robotsTxt.allows("/bbb", "Crawler/1.0"));
            assertTrue(robotsTxt.allows("/ccc", "Crawler/1.0"));
            assertTrue(robotsTxt.allows("/ddd", "Crawler/1.0"));
    Registered: Wed Jun 12 15:17:51 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 5.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

                final BufferedReader reader = new BufferedReader(new InputStreamReader(new BOMInputStream(stream), charsetName));
    
                String line;
                final RobotsTxt robotsTxt = new RobotsTxt();
                final List<Directive> currentDirectiveList = new ArrayList<>();
                boolean isGroupRecodeStarted = false;
                while ((line = reader.readLine()) != null) {
                    line = stripComment(line).trim();
    Registered: Wed Jun 12 15:17:51 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 6.1K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                        }
                    }
    
                    if (httpEntity != null) {
                        final RobotsTxt robotsTxt = robotsTxtHelper.parse(httpEntity.getContent());
                        if (robotsTxt != null) {
                            final String[] sitemaps = robotsTxt.getSitemaps();
                            if (sitemaps.length > 0) {
                                crawlerContext.addSitemaps(sitemaps);
    Registered: Wed Jun 12 15:17:51 UTC 2024
    - Last Modified: Thu May 09 09:28:25 UTC 2024
    - 41K bytes
    - Viewed (0)
  6. fess-crawler-lasta/src/main/resources/crawler/client.xml

    <!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
    	"http://dbflute.org/meta/lastadi10.dtd">
    <components namespace="fessCrawler">
    	<include path="crawler/container.xml" />
    	<include path="crawler/robotstxt.xml" />
    	<include path="crawler/contentlength.xml" />
    	<include path="crawler/mimetype.xml" />
    
    	<component name="internalHttpClient" class="org.codelibs.fess.crawler.client.http.HcHttpClient"
    		instance="prototype">
    Registered: Wed Jun 12 15:17:51 UTC 2024
    - Last Modified: Tue Aug 08 12:54:47 UTC 2023
    - 2.8K bytes
    - Viewed (0)
  7. src/cmd/go/internal/modfetch/zip_sum_test/testdata/zip_sums.csv

    github.com/tedsuo/rata,v1.0.0,h1:Sf9aZrYy6ElSTncjnGkyC2yuVvz5YJetBIUKJ4CmeKE=,f6745fd8ef8ee098410b31b1219def2c4e86c337ba6ff1319f086419b928f134
    github.com/temoto/robotstxt,v1.1.1,h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=,c37f16f826a27512b7ae683ed32be5124a0252d1c7a8c4a00fd4e27d01c563d4
    Registered: Wed Jun 12 16:32:35 UTC 2024
    - Last Modified: Wed Mar 18 17:29:01 UTC 2020
    - 334.9K bytes
    - Viewed (0)
Back to top