Search Options

Results per page
Sort
Preferred Languages
Advance

Results 111 - 120 of 172 for Crawling (0.42 sec)

  1. src/main/java/org/codelibs/fess/app/web/api/admin/fileauth/ApiAdminFileauthAction.java

     * Provides RESTful API endpoints for managing file authentication settings in the Fess search engine.
     * File authentication settings define access credentials and permissions for file-based crawling.
     *
     */
    public class ApiAdminFileauthAction extends FessApiAdminAction {
    
        private static final Logger logger = LogManager.getLogger(ApiAdminFileauthAction.class);
    
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 9.2K bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/app/web/base/FessAdminAction.java

        //                                                                           Attribute
        //                                                                           =========
        /** Helper for crawling configuration management. */
        @Resource
        protected CrawlingConfigHelper crawlingConfigHelper;
    
        // ===================================================================================
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Nov 27 07:01:25 UTC 2025
    - 9.7K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/builder/RequestDataBuilderTest.java

            RequestData data = RequestDataBuilder.newRequestData().url(null).build();
    
            assertNull(data.getUrl());
        }
    
        public void test_realWorldUsageExample1() {
            // Real-world example: crawling a web page
            RequestData data = RequestDataBuilder.newRequestData().get().url("https://example.com/article/12345").weight(1.0f).build();
    
            assertNotNull(data);
            assertEquals(Method.GET, data.getMethod());
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 10.9K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/interval/impl/HostIntervalControllerTest.java

    import org.dbflute.utflute.core.PlainTestCase;
    
    /**
     * @author hayato
     *
     */
    public class HostIntervalControllerTest extends PlainTestCase {
    
        /**
         * Test that crawling intervals for the same host work correctly.
         */
        public void test_delayBeforeProcessing() {
            // Number of concurrent tasks
            final int numTasks = 100;
            // Interval in milliseconds
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 11.4K bytes
    - Viewed (0)
  5. ADDING_NEW_LANGUAGE.md

    2. **Browser header**: `Accept-Language` header
    3. **Fallback**: English (from `fess_label.properties` and `fess_message.properties`)
    
    ### Document Language Detection
    
    During crawling and indexing, Fess:
    
    1. Detects language from document content using Apache Tika
    2. Validates against `supported.languages` list
    3. Creates language-specific fields (e.g., `content_ja`, `title_en`, `content_sv`)
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Nov 06 11:36:30 UTC 2025
    - 10.4K bytes
    - Viewed (0)
  6. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/service/impl/OpenSearchUrlQueueServiceTest.java

            final String sessionId = "poll_session5";
            final int maxSize = 5;
            urlQueueService.setMaxCrawlingQueueSize(maxSize);
    
            // Insert more items than max crawling queue size
            final List<OpenSearchUrlQueue> urlQueueList = new ArrayList<>();
            for (int i = 1; i <= maxSize + 10; i++) {
                final OpenSearchUrlQueue urlQueue = new OpenSearchUrlQueue();
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 20 08:40:57 UTC 2025
    - 14.3K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

     * </ul>
     *
     * <p>References:</p>
     * <ul>
     * <li><a href="https://datatracker.ietf.org/doc/html/rfc9309">RFC 9309 - Robots Exclusion Protocol</a></li>
     * <li><a href="https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt">
     * Google's robots.txt Specification</a></li>
     * </ul>
     *
     * @author bowez
     * @author shinsuke
     *
     */
    public class RobotsTxtHelper {
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 11.4K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    import jakarta.annotation.Resource;
    
    /**
     * The {@code HtmlTransformer} class is responsible for transforming HTML responses
     * during the crawling process. It extracts data, identifies child URLs, and handles
     * character set encoding.
     * <p>
     * This class extends {@link AbstractTransformer} and utilizes various helper classes
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Nov 29 07:42:33 UTC 2025
    - 30.5K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

            transformer.addTransformationRule("<[^>]+>", ""); // Remove HTML tags
            transformer.addTransformationRule("\\s+", " "); // Normalize whitespace
    
            // Simulate crawling response
            ResponseData responseData = new ResponseData();
            responseData.setUrl("http://example.com/page.html");
            responseData.setParentUrl("http://example.com/");
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 28K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

            }
            return false;
        }
    
        /**
         * Create the parameters for extraction.
         * @param responseData The response data.
         * @param crawlingConfig The crawling configuration.
         * @return The parameters for extraction.
         */
        protected Map<String, String> createExtractParams(final ResponseData responseData, final CrawlingConfig crawlingConfig) {
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 25.7K bytes
    - Viewed (0)
Back to top