Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 113 for urls (0.01 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/smb/SmbClientTest.java

            } catch (final ChildUrlsException e) {
                String[] urls = e.getChildUrlList().stream().map(r -> r.getUrl()).sorted().toArray(String[]::new);
                assertEquals(3, urls.length);
                assertEquals(baseUrl + "dir1/", urls[0]);
                assertEquals(baseUrl + "dir3/", urls[1]);
                assertEquals(baseUrl + "file1.txt", urls[2]);
            }
            try {
                smbClient.doGet(baseUrl + "dir1/");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 13.7K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java

     * It parses the response body as a SitemapSet, extracts URLs from the sitemaps,
     * and adds them as child URLs to be crawled.
     *
     * <p>
     * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text.
     * It then iterates through the sitemaps in the SitemapSet, extracts the URL
     * from each sitemap, and creates a new {@link RequestData} object for each URL.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

                url = url.substring(0, idx);
            }
    
            url = url.replace("/./", "/");
    
            idx = url.indexOf(";jsessionid");
            if (idx >= 0) {
                url = url.replaceFirst(";jsessionid=[a-zA-Z0-9\\.]*", "");
            }
    
            if (url.indexOf(' ') >= 0) {
                url = url.replace(" ", "%20");
            }
    
            String oldUrl = null;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/impl/DefaultIntervalController.java

     * and when waiting for new URLs.
     * The delays are configurable via constructor parameters.
     *
     */
    public class DefaultIntervalController extends AbstractIntervalController {
    
        /** Delay in milliseconds after processing a URL */
        protected long delayMillisAfterProcessing = 0L;
    
        /** Delay in milliseconds when no URL is in the queue */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java

                    }
                }
            }
        }
    
        /**
         * Generate a path from a url.
         *
         * @param url URL
         * @return path File path
         */
        protected String getFilePath(final String url) {
            return url.replaceAll("/+", "/")
                    .replace("./", "")
                    .replace("../", "")
                    .replaceAll("/$", "/index.html")
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 11.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/service/UrlFilterService.java

         * @param url the URL to be excluded
         */
        void addExcludeUrlFilter(String sessionId, String url);
    
        /**
         * Adds a list of URLs to be excluded from crawling for a specific session.
         *
         * @param sessionId the ID of the session for which the URLs should be excluded
         * @param urlList the list of URLs to be excluded
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 3.1K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ChildUrlsException.java

    /**
     * {@link ChildUrlsException} is thrown when child URLs are found during crawling.
     * It extends {@link CrawlerSystemException} and holds a set of {@link RequestData}
     * representing the child URLs that caused the exception.
     *
     */
    public class ChildUrlsException extends CrawlerSystemException {
    
        private static final long serialVersionUID = 1L;
    
        /**
         * The list of child URLs.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/IntervalController.java

     *   <li>{@code POST_PROCESSING} - Represents the post-processing state.</li>
     *   <li>{@code NO_URL_IN_QUEUE} - Indicates that there are no URLs in the queue.</li>
     *   <li>{@code WAIT_NEW_URL} - Indicates that the crawler is waiting for new URLs.</li>
     * </ul>
     */
    public interface IntervalController {
        /** Constant representing the pre-processing state. */
        int PRE_PROCESSING = 1;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.8K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ResultData.java

            this.data = data;
        }
    
        /**
         * Add a child URL.
         * @param url the request data to add to the child URL set
         */
        public void addUrl(final RequestData url) {
            childUrlSet.add(url);
        }
    
        /**
         * Add child URLs.
         * @param c the collection of request data to add to the child URL set
         */
        public void addAllUrl(final Collection<RequestData> c) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClientCreator.java

            }
        }
    
        /**
         * Registers a client component with a regular expression.
         * The component will be loaded into all registered CrawlerClientFactories.
         * @param regex The regular expression to match URLs.
         * @param componentName The name of the component to register.
         */
        public synchronized void register(final String regex, final String componentName) {
            clientMap.put(regex, componentName);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.5K bytes
    - Viewed (0)
Back to top