Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 17 for duplicates (0.15 sec)

  1. fess-crawler-opensearch/src/test/java/org/codelibs/fess/crawler/service/impl/OpenSearchUrlQueueServiceTest.java

            urlQueue2.setMethod("GET");
            urlQueue2.setSessionId(sessionId);
            urlQueue2.setUrl("http://www.example.com/page1"); // Duplicate
            urlQueueList.add(urlQueue2);
    
            urlQueueService.offerAll(sessionId, urlQueueList);
    
            // Should only store one item (duplicates are filtered)
            int count = 0;
            while (true) {
                final OpenSearchUrlQueue polled = urlQueueService.poll(sessionId);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 20 08:40:57 UTC 2025
    - 14.3K bytes
    - Viewed (0)
  2. src/test/java/org/codelibs/fess/suggest/util/SuggestUtilTest.java

            assertNotNull(invalidKeywords);
            assertEquals(0, invalidKeywords.size());
        }
    
        @Test
        public void testGetKeywordsWithDuplicates() {
            // Test that duplicates are removed
            String query = "test test query";
            String[] fields = { "content" };
            List<String> keywords = SuggestUtil.getKeywords(query, fields);
            assertNotNull(keywords);
    Registered: Sat Dec 20 13:04:59 UTC 2025
    - Last Modified: Mon Nov 24 03:40:05 UTC 2025
    - 26.7K bytes
    - Viewed (0)
  3. src/main/java/org/codelibs/fess/app/web/admin/duplicatehost/AdminDuplicatehostAction.java

        /**
         * Displays the initial duplicate host management page.
         *
         * @param form the search form
         * @return HTML response for the duplicate host list page
         */
        @Execute
        @Secured({ ROLE, ROLE + VIEW })
        public HtmlResponse index(final SearchForm form) {
            return asListHtml();
        }
    
        /**
         * Displays the duplicate host list with pagination.
         *
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Nov 20 13:56:35 UTC 2025
    - 15.6K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/helper/DuplicateHostHelper.java

        /** List of duplicate host rules for URL conversion */
        protected List<DuplicateHost> duplicateHostList;
    
        /**
         * Default constructor for DuplicateHostHelper.
         * Creates a new duplicate host helper instance.
         */
        public DuplicateHostHelper() {
            // Default constructor
        }
    
        /**
         * Initializes the duplicate host helper after construction.
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 4K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/TextUtil.java

     *   <li>Optionally removing duplicate terms based on a flag.</li>
     *   <li>Limiting the maximum size of alphanumeric and symbol terms.</li>
     * </ul>
     *
     * <p>The {@link TextNormalizeContext} class provides a fluent API to configure the text
     * normalization process, including setting initial buffer capacity, maximum term sizes,
     * duplicate term removal, and custom space characters.
     *
     * <p>Example usage:
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Nov 22 13:28:22 UTC 2025
    - 12K bytes
    - Viewed (0)
  6. src/main/java/org/codelibs/fess/helper/DocumentHelper.java

            return fessConfig.getCrawlerDocumentMaxSymbolTermSizeAsInteger();
        }
    
        /**
         * Checks if duplicate term removal is enabled in configuration.
         *
         * @return true if duplicate terms should be removed, false otherwise
         */
        protected boolean isDuplicateTermRemoved() {
            final FessConfig fessConfig = ComponentUtil.getFessConfig();
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 17.4K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

         */
        public void setInitialBufferSize(final int initialBufferSize) {
            this.initialBufferSize = initialBufferSize;
        }
    
        /**
         * Sets whether duplicated terms are replaced.
         * @param replaceDuplication If true, duplicated terms are replaced.
         */
        public void setReplaceDuplication(final boolean replaceDuplication) {
            this.replaceDuplication = replaceDuplication;
        }
    
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 30.8K bytes
    - Viewed (0)
  8. src/main/resources/fess_config.properties

    # Maximum size of alphanumeric terms in documents.
    crawler.document.max.alphanum.term.size=20
    # Maximum size of symbol terms in documents.
    crawler.document.max.symbol.term.size=10
    # Whether to remove duplicate terms in documents.
    crawler.document.duplicate.term.removed=false
    # Unicode space characters for document parsing.
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Dec 11 09:47:03 UTC 2025
    - 54.8K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/service/impl/DataServiceImpl.java

                if (arMap.containsKey(accessResult.getUrl())) {
                    throw new CrawlerSystemException(
                            "AccessResult for URL '" + accessResult.getUrl() + "' already exists. Duplicate URLs are not allowed.");
                }
                arMap.put(accessResult.getUrl(), accessResult);
            }
    
        }
    
        /**
         * Gets the count of access results for the specified session.
         *
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 6.9K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/crawler/FessCrawlerThread.java

                        new ContentNotFoundException(urlQueue.getParentUrl(), url));
            }
        }
    
        /**
         * Stores a child URL in the crawling queue with duplicate host handling.
         * This method applies duplicate host conversion before storing the URL.
         *
         * @param childUrl the child URL to store
         * @param parentUrl the parent URL that referenced this child URL
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Dec 11 09:47:03 UTC 2025
    - 19.5K bytes
    - Viewed (0)
Back to top