- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 17 for duplicates (0.15 sec)
-
fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/service/impl/OpenSearchUrlQueueServiceTest.java
urlQueue2.setMethod("GET"); urlQueue2.setSessionId(sessionId); urlQueue2.setUrl("http://www.example.com/page1"); // Duplicate urlQueueList.add(urlQueue2); urlQueueService.offerAll(sessionId, urlQueueList); // Should only store one item (duplicates are filtered) int count = 0; while (true) { final OpenSearchUrlQueue polled = urlQueueService.poll(sessionId);
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Nov 20 08:40:57 UTC 2025 - 14.3K bytes - Viewed (0) -
src/test/java/org/codelibs/fess/suggest/util/SuggestUtilTest.java
assertNotNull(invalidKeywords); assertEquals(0, invalidKeywords.size()); } @Test public void testGetKeywordsWithDuplicates() { // Test that duplicates are removed String query = "test test query"; String[] fields = { "content" }; List<String> keywords = SuggestUtil.getKeywords(query, fields); assertNotNull(keywords);
Registered: Sat Dec 20 13:04:59 UTC 2025 - Last Modified: Mon Nov 24 03:40:05 UTC 2025 - 26.7K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/app/web/admin/duplicatehost/AdminDuplicatehostAction.java
/** * Displays the initial duplicate host management page. * * @param form the search form * @return HTML response for the duplicate host list page */ @Execute @Secured({ ROLE, ROLE + VIEW }) public HtmlResponse index(final SearchForm form) { return asListHtml(); } /** * Displays the duplicate host list with pagination. *Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Thu Nov 20 13:56:35 UTC 2025 - 15.6K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/DuplicateHostHelper.java
/** List of duplicate host rules for URL conversion */ protected List<DuplicateHost> duplicateHostList; /** * Default constructor for DuplicateHostHelper. * Creates a new duplicate host helper instance. */ public DuplicateHostHelper() { // Default constructor } /** * Initializes the duplicate host helper after construction.Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Fri Nov 28 16:29:12 UTC 2025 - 4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/util/TextUtil.java
* <li>Optionally removing duplicate terms based on a flag.</li> * <li>Limiting the maximum size of alphanumeric and symbol terms.</li> * </ul> * * <p>The {@link TextNormalizeContext} class provides a fluent API to configure the text * normalization process, including setting initial buffer capacity, maximum term sizes, * duplicate term removal, and custom space characters. * * <p>Example usage:
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Nov 22 13:28:22 UTC 2025 - 12K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/DocumentHelper.java
return fessConfig.getCrawlerDocumentMaxSymbolTermSizeAsInteger(); } /** * Checks if duplicate term removal is enabled in configuration. * * @return true if duplicate terms should be removed, false otherwise */ protected boolean isDuplicateTermRemoved() { final FessConfig fessConfig = ComponentUtil.getFessConfig();
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Fri Nov 28 16:29:12 UTC 2025 - 17.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java
*/ public void setInitialBufferSize(final int initialBufferSize) { this.initialBufferSize = initialBufferSize; } /** * Sets whether duplicated terms are replaced. * @param replaceDuplication If true, duplicated terms are replaced. */ public void setReplaceDuplication(final boolean replaceDuplication) { this.replaceDuplication = replaceDuplication; } /**Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 30.8K bytes - Viewed (0) -
src/main/resources/fess_config.properties
# Maximum size of alphanumeric terms in documents. crawler.document.max.alphanum.term.size=20 # Maximum size of symbol terms in documents. crawler.document.max.symbol.term.size=10 # Whether to remove duplicate terms in documents. crawler.document.duplicate.term.removed=false # Unicode space characters for document parsing.
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Thu Dec 11 09:47:03 UTC 2025 - 54.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/service/impl/DataServiceImpl.java
if (arMap.containsKey(accessResult.getUrl())) { throw new CrawlerSystemException( "AccessResult for URL '" + accessResult.getUrl() + "' already exists. Duplicate URLs are not allowed."); } arMap.put(accessResult.getUrl(), accessResult); } } /** * Gets the count of access results for the specified session. *
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 6.9K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java
new ContentNotFoundException(urlQueue.getParentUrl(), url)); } } /** * Stores a child URL in the crawling queue with duplicate host handling. * This method applies duplicate host conversion before storing the URL. * * @param childUrl the child URL to store * @param parentUrl the parent URL that referenced this child URL
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Thu Dec 11 09:47:03 UTC 2025 - 19.5K bytes - Viewed (0)