Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 45 for URLs (0.05 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/smb/SmbClientTest.java

            } catch (final ChildUrlsException e) {
                String[] urls = e.getChildUrlList().stream().map(r -> r.getUrl()).sorted().toArray(String[]::new);
                assertEquals(3, urls.length);
                assertEquals(baseUrl + "dir1/", urls[0]);
                assertEquals(baseUrl + "dir3/", urls[1]);
                assertEquals(baseUrl + "file1.txt", urls[2]);
            }
            try {
                smbClient.doGet(baseUrl + "dir1/");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 13.7K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java

    import jakarta.annotation.Resource;
    
    /**
     * A response processor implementation that handles sitemaps.
     * It parses the response body as a SitemapSet, extracts URLs from the sitemaps,
     * and adds them as child URLs to be crawled.
     *
     * <p>
     * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text.
     * It then iterates through the sitemaps in the SitemapSet, extracts the URL
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

            }
            return null;
        }
    
        /**
         * Extracts URLs from HTML tag attributes using XPath.
         *
         * @param url the base URL for resolving relative URLs
         * @param document the document to extract URLs from
         * @param xpath the XPath expression to select elements
         * @param attr the attribute name to extract URLs from
         * @param encoding the character encoding to use
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/impl/DefaultIntervalController.java

     * Default implementation of the IntervalController.
     * This class provides a default way to manage delays between crawler operations.
     * It allows setting delays before processing, after processing, when no URLs are in the queue,
     * and when waiting for new URLs.
     * The delays are configurable via constructor parameters.
     *
     */
    public class DefaultIntervalController extends AbstractIntervalController {
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java

            this.path = path;
        }
    
        /**
         * Gets the replacement string for question marks in URLs.
         *
         * @return the question mark replacement string
         */
        public String getQuestionStr() {
            return questionStr;
        }
    
        /**
         * Sets the replacement string for question marks in URLs.
         *
         * @param questionStr the question mark replacement string to set
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 11.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ChildUrlsException.java

    /**
     * {@link ChildUrlsException} is thrown when child URLs are found during crawling.
     * It extends {@link CrawlerSystemException} and holds a set of {@link RequestData}
     * representing the child URLs that caused the exception.
     *
     */
    public class ChildUrlsException extends CrawlerSystemException {
    
        private static final long serialVersionUID = 1L;
    
        /**
         * The list of child URLs.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/IntervalController.java

     *   <li>{@code POST_PROCESSING} - Represents the post-processing state.</li>
     *   <li>{@code NO_URL_IN_QUEUE} - Indicates that there are no URLs in the queue.</li>
     *   <li>{@code WAIT_NEW_URL} - Indicates that the crawler is waiting for new URLs.</li>
     * </ul>
     */
    public interface IntervalController {
        /** Constant representing the pre-processing state. */
        int PRE_PROCESSING = 1;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClientCreator.java

            }
        }
    
        /**
         * Registers a client component with a regular expression.
         * The component will be loaded into all registered CrawlerClientFactories.
         * @param regex The regular expression to match URLs.
         * @param componentName The name of the component to register.
         */
        public synchronized void register(final String regex, final String componentName) {
            clientMap.put(regex, componentName);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.5K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/service/UrlFilterService.java

         */
        void addIncludeUrlFilter(String sessionId, String url);
    
        /**
         * Adds a list of URLs to the include filter for a given session.
         *
         * @param sessionId the ID of the session for which the URLs should be included
         * @param urlList the list of URLs to be added to the include filter
         */
        void addIncludeUrlFilter(String sessionId, List<String> urlList);
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 3.1K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        }
    
        /**
         * Returns the set of robots.txt URLs.
         * @return The set of robots.txt URLs.
         */
        public Set<String> getRobotsTxtUrlSet() {
            return robotsTxtUrlSet;
        }
    
        /**
         * Sets the set of robots.txt URLs.
         * @param robotsTxtUrlSet The set of robots.txt URLs.
         */
        public void setRobotsTxtUrlSet(final Set<String> robotsTxtUrlSet) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.9K bytes
    - Viewed (0)
Back to top