Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 5 of 5 for RobotsTxt (0.1 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

    import java.util.ArrayList;
    import java.util.LinkedHashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Pattern;
    
    import org.codelibs.core.lang.StringUtil;
    
    public class RobotsTxt {
        private static final String ALL_BOTS = "*";
    
        protected final Map<Pattern, Directive> directiveMap = new LinkedHashMap<>();
    
        private final List<String> sitemapList = new ArrayList<>();
    
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 4.8K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            assertTrue(robotsTxt.allows("/bbb", "Crawler"));
            assertTrue(robotsTxt.allows("/ccc", "Crawler"));
            assertTrue(robotsTxt.allows("/ddd", "Crawler"));
            assertTrue(robotsTxt.allows("/aaa", "Crawler/1.0"));
            assertFalse(robotsTxt.allows("/bbb", "Crawler/1.0"));
            assertTrue(robotsTxt.allows("/ccc", "Crawler/1.0"));
            assertTrue(robotsTxt.allows("/ddd", "Crawler/1.0"));
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu Feb 22 01:36:27 UTC 2024
    - 5.9K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

                final BufferedReader reader = new BufferedReader(new InputStreamReader(new BOMInputStream(stream), charsetName));
    
                String line;
                final RobotsTxt robotsTxt = new RobotsTxt();
                final List<Directive> currentDirectiveList = new ArrayList<>();
                boolean isGroupRecodeStarted = false;
                while ((line = reader.readLine()) != null) {
                    line = stripComment(line).trim();
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Sat Oct 12 01:40:57 UTC 2024
    - 6.1K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                        }
                    }
    
                    if (httpEntity != null) {
                        final RobotsTxt robotsTxt = robotsTxtHelper.parse(httpEntity.getContent());
                        if (robotsTxt != null) {
                            final String[] sitemaps = robotsTxt.getSitemaps();
                            if (sitemaps.length > 0) {
                                crawlerContext.addSitemaps(sitemaps);
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Thu May 09 09:29:26 UTC 2024
    - 41K bytes
    - Viewed (0)
  5. fess-crawler-lasta/src/main/resources/crawler/client.xml

    <!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
    	"http://dbflute.org/meta/lastadi10.dtd">
    <components namespace="fessCrawler">
    	<include path="crawler/container.xml" />
    	<include path="crawler/robotstxt.xml" />
    	<include path="crawler/contentlength.xml" />
    	<include path="crawler/mimetype.xml" />
    
    	<component name="internalHttpClient" class="org.codelibs.fess.crawler.client.http.HcHttpClient"
    		instance="prototype">
    Registered: Sun Nov 10 03:50:12 UTC 2024
    - Last Modified: Tue Aug 08 12:54:47 UTC 2023
    - 2.8K bytes
    - Viewed (0)
Back to top