- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 113 for urls (0.01 sec)
-
fess-crawler/src/test/java/org/codelibs/fess/crawler/client/smb/SmbClientTest.java
} catch (final ChildUrlsException e) { String[] urls = e.getChildUrlList().stream().map(r -> r.getUrl()).sorted().toArray(String[]::new); assertEquals(3, urls.length); assertEquals(baseUrl + "dir1/", urls[0]); assertEquals(baseUrl + "dir3/", urls[1]); assertEquals(baseUrl + "file1.txt", urls[2]); } try { smbClient.doGet(baseUrl + "dir1/");
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 13.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java
* It parses the response body as a SitemapSet, extracts URLs from the sitemaps, * and adds them as child URLs to be crawled. * * <p> * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text. * It then iterates through the sitemaps in the SitemapSet, extracts the URL * from each sitemap, and creates a new {@link RequestData} object for each URL.Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 3.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java
url = url.substring(0, idx); } url = url.replace("/./", "/"); idx = url.indexOf(";jsessionid"); if (idx >= 0) { url = url.replaceFirst(";jsessionid=[a-zA-Z0-9\\.]*", ""); } if (url.indexOf(' ') >= 0) { url = url.replace(" ", "%20"); } String oldUrl = null;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 28.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/impl/DefaultIntervalController.java
* and when waiting for new URLs. * The delays are configurable via constructor parameters. * */ public class DefaultIntervalController extends AbstractIntervalController { /** Delay in milliseconds after processing a URL */ protected long delayMillisAfterProcessing = 0L; /** Delay in milliseconds when no URL is in the queue */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 3.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java
} } } } /** * Generate a path from a url. * * @param url URL * @return path File path */ protected String getFilePath(final String url) { return url.replaceAll("/+", "/") .replace("./", "") .replace("../", "") .replaceAll("/$", "/index.html")Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 11.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/service/UrlFilterService.java
* @param url the URL to be excluded */ void addExcludeUrlFilter(String sessionId, String url); /** * Adds a list of URLs to be excluded from crawling for a specific session. * * @param sessionId the ID of the session for which the URLs should be excluded * @param urlList the list of URLs to be excluded */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 3.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ChildUrlsException.java
/** * {@link ChildUrlsException} is thrown when child URLs are found during crawling. * It extends {@link CrawlerSystemException} and holds a set of {@link RequestData} * representing the child URLs that caused the exception. * */ public class ChildUrlsException extends CrawlerSystemException { private static final long serialVersionUID = 1L; /** * The list of child URLs. */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 1.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/IntervalController.java
* <li>{@code POST_PROCESSING} - Represents the post-processing state.</li> * <li>{@code NO_URL_IN_QUEUE} - Indicates that there are no URLs in the queue.</li> * <li>{@code WAIT_NEW_URL} - Indicates that the crawler is waiting for new URLs.</li> * </ul> */ public interface IntervalController { /** Constant representing the pre-processing state. */ int PRE_PROCESSING = 1;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 1.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ResultData.java
this.data = data; } /** * Add a child URL. * @param url the request data to add to the child URL set */ public void addUrl(final RequestData url) { childUrlSet.add(url); } /** * Add child URLs. * @param c the collection of request data to add to the child URL set */ public void addAllUrl(final Collection<RequestData> c) {Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClientCreator.java
} } /** * Registers a client component with a regular expression. * The component will be loaded into all registered CrawlerClientFactories. * @param regex The regular expression to match URLs. * @param componentName The name of the component to register. */ public synchronized void register(final String regex, final String componentName) { clientMap.put(regex, componentName);Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.5K bytes - Viewed (0)