- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 45 for URLs (0.05 sec)
-
fess-crawler/src/test/java/org/codelibs/fess/crawler/client/smb/SmbClientTest.java
} catch (final ChildUrlsException e) { String[] urls = e.getChildUrlList().stream().map(r -> r.getUrl()).sorted().toArray(String[]::new); assertEquals(3, urls.length); assertEquals(baseUrl + "dir1/", urls[0]); assertEquals(baseUrl + "dir3/", urls[1]); assertEquals(baseUrl + "file1.txt", urls[2]); } try { smbClient.doGet(baseUrl + "dir1/");
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 13.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java
import jakarta.annotation.Resource; /** * A response processor implementation that handles sitemaps. * It parses the response body as a SitemapSet, extracts URLs from the sitemaps, * and adds them as child URLs to be crawled. * * <p> * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text. * It then iterates through the sitemaps in the SitemapSet, extracts the URLRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 3.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java
} return null; } /** * Extracts URLs from HTML tag attributes using XPath. * * @param url the base URL for resolving relative URLs * @param document the document to extract URLs from * @param xpath the XPath expression to select elements * @param attr the attribute name to extract URLs from * @param encoding the character encoding to useRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 28.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/impl/DefaultIntervalController.java
* Default implementation of the IntervalController. * This class provides a default way to manage delays between crawler operations. * It allows setting delays before processing, after processing, when no URLs are in the queue, * and when waiting for new URLs. * The delays are configurable via constructor parameters. * */ public class DefaultIntervalController extends AbstractIntervalController {Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 3.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java
this.path = path; } /** * Gets the replacement string for question marks in URLs. * * @return the question mark replacement string */ public String getQuestionStr() { return questionStr; } /** * Sets the replacement string for question marks in URLs. * * @param questionStr the question mark replacement string to set */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 11.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ChildUrlsException.java
/** * {@link ChildUrlsException} is thrown when child URLs are found during crawling. * It extends {@link CrawlerSystemException} and holds a set of {@link RequestData} * representing the child URLs that caused the exception. * */ public class ChildUrlsException extends CrawlerSystemException { private static final long serialVersionUID = 1L; /** * The list of child URLs. */Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 1.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/IntervalController.java
* <li>{@code POST_PROCESSING} - Represents the post-processing state.</li> * <li>{@code NO_URL_IN_QUEUE} - Indicates that there are no URLs in the queue.</li> * <li>{@code WAIT_NEW_URL} - Indicates that the crawler is waiting for new URLs.</li> * </ul> */ public interface IntervalController { /** Constant representing the pre-processing state. */ int PRE_PROCESSING = 1;
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 1.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClientCreator.java
} } /** * Registers a client component with a regular expression. * The component will be loaded into all registered CrawlerClientFactories. * @param regex The regular expression to match URLs. * @param componentName The name of the component to register. */ public synchronized void register(final String regex, final String componentName) { clientMap.put(regex, componentName);Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/service/UrlFilterService.java
*/ void addIncludeUrlFilter(String sessionId, String url); /** * Adds a list of URLs to the include filter for a given session. * * @param sessionId the ID of the session for which the URLs should be included * @param urlList the list of URLs to be added to the include filter */ void addIncludeUrlFilter(String sessionId, List<String> urlList); /**Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 3.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java
} /** * Returns the set of robots.txt URLs. * @return The set of robots.txt URLs. */ public Set<String> getRobotsTxtUrlSet() { return robotsTxtUrlSet; } /** * Sets the set of robots.txt URLs. * @param robotsTxtUrlSet The set of robots.txt URLs. */ public void setRobotsTxtUrlSet(final Set<String> robotsTxtUrlSet) {Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 8.9K bytes - Viewed (0)