Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 16 for Follow (0.03 sec)

  1. fess-crawler/src/test/resources/extractor/eml/sample2.eml

    </tr>
    <!-- IF TEXT-ONLY bottom padding -->
    <tr>
    <td height="28" style="padding:0;margin:0;line-height:1px;font-size:1px;"></td>
    </tr>
    <!-- end bottom padding for a text-only tweet-->
    <!-- IF USER DOES NOT FOLLOW ACCOUNT else -->
    <tr>
    <td colspan="2" style="padding:0;margin:0;line-height:1px;font-size:1px;">
    <table border="0" cellpadding="0" cellspacing="0" align="left" style="padding:0;margin:0;line-height:1px;font-size:1px;">
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Jan 16 07:50:35 UTC 2016
    - 91.6K bytes
    - Viewed (0)
  2. README.md

        System.out.println("Processed: " + processed + " URLs");
        Thread.sleep(5000);
    }
    ```
    
    ## Contributing
    
    We welcome contributions to Fess Crawler! Please follow these guidelines:
    
    1. **Fork** the repository
    2. **Create** a feature branch (`git checkout -b feature/amazing-feature`)
    3. **Commit** your changes (`git commit -m 'Add amazing feature'`)
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  3. README.md

    ```java
    // Add to your logging configuration
    logger.debug.org.codelibs.fess.suggest=DEBUG
    ```
    
    ## Contributing
    
    1. Fork the repository
    2. Create a feature branch: `git checkout -b feature/amazing-feature`
    3. Follow coding standards: `mvn formatter:format`
    4. Add tests for new functionality
    5. Ensure all tests pass: `mvn test`
    6. Check license headers: `mvn license:format`
    7. Commit changes: `git commit -m 'Add amazing feature'`
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Sun Aug 31 03:31:14 UTC 2025
    - 12.1K bytes
    - Viewed (1)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

        /** Default character encoding to use when none is specified. */
        protected String defaultEncoding;
    
        /** Number of bytes to read from input stream to determine character set encoding.
         * If you want to follow a html spec, use 512. */
        protected int preloadSizeForCharset = 2048;
    
        /**
         * Pattern for invalid URLs.
         */
        protected Pattern invalidUrlPattern = Pattern.compile("^\\s*javascript:|" //
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  5. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/CrawlerTest.java

                settingsBuilder.put("http.cors.enabled", true);
                settingsBuilder.put("http.cors.allow-origin", "*");
                settingsBuilder.put("discovery.type", "single-node");
            }).build(newConfigs().clusterName(clusterName).numOfNode(1));
    
            // wait for yellow status
            runner.ensureYellow();
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  6. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt

    User-agent: FessCrawler
    Disallow:           # allows all 
    
    User-agent: BruteBot
    Disallow: /
    Allow: /foo/bar/
    Crawl-delay: 1314000
    
    # welcome!
    User-agent: Googlebot
    Crawl-delay: 1
    
    User-agent: *
    Disallow: /private/
    Disallow: /help        # disallows /help.html, /help/index.html, etc.
    Allow: /help/faq.html
    Crawl-delay: 3
    
    User-agent: Crawler
    Disallow: /aaa
    
    User-agent: Crawler/1.0
    Disallow: /bbb
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 566 bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/Sitemap.java

    package org.codelibs.fess.crawler.entity;
    
    import java.io.Serializable;
    
    /**
     * The Sitemap interface represents a sitemap entity with location and last modification date.
     * It extends the Serializable interface to allow sitemap objects to be serialized.
     */
    public interface Sitemap extends Serializable {
    
        /**
         * Retrieves the location (URL) of the sitemap.
         *
         * @return the location of the sitemap as a String.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.2K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

        protected static final Pattern DISALLOW_RECORD = Pattern.compile("^disallow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing allow records. */
        protected static final Pattern ALLOW_RECORD = Pattern.compile("^allow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing crawl-delay records. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  9. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/service/impl/OpenSearchUrlQueueServiceTest.java

                settingsBuilder.put("http.cors.enabled", true);
                settingsBuilder.put("discovery.type", "single-node");
            }).build(newConfigs().clusterName(clusterName).numOfNode(1));
    
            // wait for yellow status
            runner.ensureYellow();
    
            System.setProperty(FesenClient.HTTP_ADDRESS, "localhost:" + runner.node().settings().get("http.port", "9201"));
    
            super.setUp();
        }
    
        @Override
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 5.8K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClient.java

    import org.codelibs.fess.crawler.entity.ResponseData;
    
    /**
     * Interface representing a client for a web crawler.
     * This client is responsible for executing requests and handling responses.
     * It extends {@link AutoCloseable} to allow for resource management.
     */
    public interface CrawlerClient extends AutoCloseable {
    
        /**
         * Sets the initialization parameters for the crawler client.
         *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.8K bytes
    - Viewed (0)
Back to top