Search Options

Results per page
Sort
Preferred Languages
Advance

Results 91 - 100 of 105 for metodo (0.15 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/CrawlingParameterUtil.java

    import org.codelibs.fess.crawler.service.DataService;
    import org.codelibs.fess.crawler.service.UrlQueueService;
    
    /**
     * Utility class for managing crawling parameters using ThreadLocal variables.
     * This class provides methods to set and get various parameters related to the crawling process.
     *
     * <p>This class is final and cannot be instantiated.</p>
     *
     * <p>The following parameters are managed:</p>
     * <ul>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.4K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImpl.java

    /**
     * MimeTypeHelperImpl is a helper class that detects the MIME type of a given input stream or filename.
     * It uses the Apache Tika library to detect the MIME type.
     *
     * <p>
     * This class provides methods to:
     * </p>
     * <ul>
     *   <li>Detect the MIME type based on the input stream and filename.</li>
     *   <li>Normalize the filename to handle special characters.</li>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/RegexRule.java

     * only one of them (allRequired = false). It also supports a default rule that always matches.
     *
     * <p>
     * The class uses a map of field names to Pattern objects to store the regular expressions.
     * The match method extracts the values of the specified fields from the ResponseData and
     * applies the corresponding regular expressions.
     * </p>
     *
     * <p>
     * Example usage:
     * </p>
     *
     * <pre>
     * {@code
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.2K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

    /**
     * The {@link CrawlerContext} class holds the context information for a crawler execution.
     * It contains various attributes related to the crawler's state, configuration, and runtime data.
     * This class provides methods to access and modify these attributes, allowing for control and monitoring
     * of the crawler's behavior.
     *
     * <p>
     * The context includes information such as the session ID, active thread count, access count, crawler status,
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/service/impl/UrlQueueServiceImpl.java

    import org.codelibs.fess.crawler.service.UrlQueueService;
    
    import jakarta.annotation.Resource;
    
    /**
     * Implementation of the {@link UrlQueueService} interface.
     * This class provides methods for managing a queue of URLs to be crawled,
     * including adding, deleting, and retrieving URLs from the queue.
     * It uses a {@link MemoryDataHelper} to store the URL queue data in memory.
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImplTest.java

            assertContentType("application/zip", "extractor/zip/test.zip", "hoge.zip");
            assertContentType("application/x-lharc", "extractor/lha/test.lzh", "hoge.lzh"); // TODO is it correct?
    
            assertContentType("application/xml", "extractor/test.mm", "hoge.mm");
    
            assertContentType("message/rfc822", "extractor/eml/sample1.eml", "sample1.eml");
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 11.6K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

     *   <li>Extracting child URLs from the HTML content based on configured rules.</li>
     *   <li>Handling redirect URLs specified in the response headers.</li>
     * </ol>
     * <p>
     * The class also provides methods for configuring features and properties of the
     * underlying DOM parser, as well as defining rules for extracting child URLs
     * from specific HTML tags and attributes.
     * </p>
     *
     * <p>
     * <b>Configuration:</b>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

            if (directive == null) {
                return 0;
            }
            return directive.getCrawlDelay();
        }
    
        /**
         * Returns the most specific directive matching the given user agent.
         * The method finds the longest matching user agent pattern in the directives,
         * excluding the general "*" pattern which matches all bots.
         *
         * @param userAgent the user agent string to match against directives,
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/DefaultResponseProcessor.java

     * and {@link UrlQueue}. It also uses {@link CrawlingParameterUtil} to access services
     * like {@link UrlQueueService} and DataService, as well as the {@link CrawlerContext}.
     * </p>
     *
     * <p>
     * The class provides methods to check if a response is successful or not modified based on
     * configured HTTP status codes. It also handles the storage of child URLs found in the
     * response data, respecting the maximum depth and access count limits.
     * </p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 12.5K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JodExtractorTest.java

            String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("ใƒ†ใ‚นใƒˆ"));
        }
    
        /*
         * TODO not work... public void test_getText_mswordx_as() { InputStream in =
         * ResourceUtil.getResourceAsStream("extractor/msoffice/test_as.docx");
         * Map<String, String> params = new HashMap<String, String>();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 9.5K bytes
    - Viewed (0)
Back to top