Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 14 for protocols (0.05 sec)

  1. README.md

    crawler.urlFilter.addInclude(".*\\.pdf$");
    
    // Exclude patterns  
    crawler.urlFilter.addExclude(".*\\.js$");
    crawler.urlFilter.addExclude(".*login.*");
    ```
    
    ## Supported Protocols and Formats
    
    ### Protocols
    - **HTTP/HTTPS**: Full web crawling support with cookies, authentication, redirects
    - **File System**: Local and network file system access
    - **FTP**: FTP server crawling with authentication
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

            assertTrue(urlFilter.match("https://example.com/日本/page"));
            assertFalse(urlFilter.match("https://example.com/china/page"));
        }
    
        /**
         * Test URL with special protocols
         */
        public void test_specialProtocols() {
            String sessionId = "test-session-022";
            urlFilter.init(sessionId);
    
            urlFilter.addInclude("(http|https|ftp|file)://.*");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 19K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/container/CrawlerContainer.java

            final String value = System.getProperty("java.protocol.handler.pkgs");
            if (StringUtil.isEmpty(value)) {
                buf.append("org.codelibs.fess.net.protocol");
            } else if (!value.contains("org.codelibs.fess.net.protocol")) {
                buf.append("|org.codelibs.fess.net.protocol");
            }
            if (buf.length() > 0) {
                System.setProperty("java.protocol.handler.pkgs", buf.toString());
            }
        }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/net/protocol/storage/Handler.java

    import io.minio.errors.ServerException;
    import io.minio.errors.XmlParserException;
    
    /**
     * Handler for the "storage" protocol, allowing access to objects stored in a MinIO-compatible storage service.
     * This handler extends {@link URLStreamHandler} to provide a way to open connections to storage objects
     * using URLs with the "storage" protocol.
     *
     * <p>
     * The URL format is expected to be: {@code storage://bucketName/objectName}.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.5K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/net/protocol/storage/HandlerTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.net.protocol.storage;
    
    import java.net.URL;
    
    import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
    import org.dbflute.utflute.core.PlainTestCase;
    
    public class HandlerTest extends PlainTestCase {
        @Override
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.1K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/ntlm/NTLMSchemeProvider.java

     */
    package org.codelibs.fess.crawler.client.http.ntlm;
    
    import org.apache.http.auth.AuthScheme;
    import org.apache.http.auth.AuthSchemeProvider;
    import org.apache.http.impl.auth.NTLMScheme;
    import org.apache.http.protocol.HttpContext;
    
    /**
     * This class is AuthSchemeFactory implementation for NTLM.
     *
     * @author shinsuke
     *
     */
    public class NTLMSchemeProvider implements AuthSchemeProvider {
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.4K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/smb1/SmbClient.java

    import org.codelibs.jcifs.smb1.util.LogStream;
    
    /**
     * The {@link SmbClient} class is a crawler client implementation for accessing files and directories
     * on SMB (Server Message Block) shares using the SMB1 protocol. It extends {@link AbstractCrawlerClient} and utilizes the JCIFS library
     * to interact with SMB resources.
     *
     * <p>
     * This client supports authentication, content retrieval, and metadata extraction from SMB files.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Sep 18 09:30:45 UTC 2025
    - 23K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/storage/StorageClient.java

    import jakarta.annotation.Resource;
    
    /**
     * A crawler client implementation for accessing and retrieving content from storage systems using MinIO.
     * This client supports operations on object storage systems compatible with S3 protocol.
     *
     * <p>This client requires the following initialization parameters:
     * <ul>
     *   <li>endpoint - The URL of the MinIO server</li>
     *   <li>accessKey - The access key for authentication</li>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 17.9K bytes
    - Viewed (2)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapUrl.java

        /**
         * Creates a new SitemapUrl instance.
         */
        public SitemapUrl() {
            super();
        }
    
        /**
         * URL of the page. This URL must begin with the protocol (such as http) and
         * end with a trailing slash, if your web server requires it. This value
         * must be less than 2,048 characters.
         */
        private String loc;
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

     * This class manages the rules defined in a robots.txt file, including user agent directives,
     * allowed/disallowed paths, crawl delays, and sitemap URLs.
     *
     * <p>The robots.txt protocol is implemented according to the standard specification,
     * supporting pattern matching for user agents, path-based access control, and crawl delay settings.</p>
     *
     * <p>Key features:</p>
     * <ul>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
Back to top