Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 30 for unicode (3.04 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorEnhancedTest.java

            assertTrue("Should contain line 9999", result.getContent().contains("Line 9999"));
        }
    
        /**
         * Test extraction with various Unicode characters.
         */
        public void test_getText_unicodeContent_extractsCorrectly() {
            final String unicodeContent = "Hello 世界 مرحبا мир שלום";
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/util/CharUtilTest.java

            assertFalse(CharUtil.isUrlChar('\u00A0')); // non-breaking space
            assertFalse(CharUtil.isUrlChar('\u00FF')); // ÿ
        }
    
        public void test_isUrlChar_unicode() {
            // Test Unicode characters (not valid URL chars without encoding)
            assertFalse(CharUtil.isUrlChar('\u3042')); // あ (Hiragana)
            assertFalse(CharUtil.isUrlChar('\u4E00')); // 一 (CJK)
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 5.6K bytes
    - Viewed (0)
  3. CLAUDE.md

    **Technology Stack:**
    - Java 21+, Maven
    - OpenSearch (provided dependency)
    - Apache Lucene (query parsing, text analysis)
    - ICU4J (Unicode text processing)
    - JUnit 4 (testing)
    
    **Repository:** https://github.com/codelibs/fess-suggest
    
    ---
    
    ## Architecture
    
    ### Package Structure
    
    ```
    org.codelibs.fess.suggest/
    Registered: Sat Dec 20 13:04:59 UTC 2025
    - Last Modified: Mon Nov 24 03:40:05 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  4. src/test/java/org/codelibs/curl/CurlRequestTest.java

            // Body with unicode characters
            String unicodeBody = "{\"message\":\"こんにちは世界\"}";
            request.body(unicodeBody);
    
            assertEquals(unicodeBody, request.body());
        }
    
        @Test
        public void testParamWithUnicodeCharacters() {
            CurlRequest request = new CurlRequest(Method.GET, "https://example.com");
    
            // Param with unicode characters
    Registered: Sat Dec 20 09:13:53 UTC 2025
    - Last Modified: Mon Nov 24 03:10:07 UTC 2025
    - 20.5K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            try {
                robotsTxt = robotsTxtHelper.parse(in, "UTF-8");
            } finally {
                CloseableUtil.closeQuietly(in);
            }
    
            // Should handle unicode content
            assertNotNull(robotsTxt);
            assertFalse(robotsTxt.allows("/test/", "TestBot"));
        }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 20.6K bytes
    - Viewed (0)
  6. src/test/java/org/codelibs/fess/suggest/util/SuggestUtilTest.java

            assertEquals(expected, id); // Should not be truncated
        }
    
        @Test
        public void testCreateSuggestTextIdWithUnicodeCharacters() {
            // Test with various Unicode characters that may encode differently
            String unicodeText = "日本語テスト🎌🗾こんにちは世界";
            String id = SuggestUtil.createSuggestTextId(unicodeText);
            assertNotNull(id);
            assertTrue(id.length() <= 445);
        }
    
    Registered: Sat Dec 20 13:04:59 UTC 2025
    - Last Modified: Mon Nov 24 03:40:05 UTC 2025
    - 26.7K bytes
    - Viewed (0)
  7. src/main/resources/fess_config.properties

    # Whether to remove duplicate terms in documents.
    crawler.document.duplicate.term.removed=false
    # Unicode space characters for document parsing.
    crawler.document.space.chars=u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6
    # Unicode full stop characters for document parsing.
    crawler.document.fullstop.chars=u002eu06d4u2e3cu3002
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Dec 11 09:47:03 UTC 2025
    - 54.8K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

         * The value is, e.g. u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6 <br>
         * comment: Unicode space characters for document parsing.
         * @return The value of found property. (NotNull: if not found, exception but basically no way)
         */
        String getCrawlerDocumentSpaceChars();
    
        /**
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Sat Dec 13 02:21:17 UTC 2025
    - 525.7K bytes
    - Viewed (2)
  9. src/test/java/org/codelibs/core/misc/Base64UtilTest.java

            }
        }
    
        /**
         * Test encode with empty byte array
         *
         * @throws Exception
         */
        public void testEncode_EmptyArray() throws Exception {
            final String result = Base64Util.encode(new byte[0]);
            assertEquals("Empty array should return empty string", "", result);
        }
    
        /**
         * Test encode with null returns empty string
         *
         * @throws Exception
    Registered: Sat Dec 20 08:55:33 UTC 2025
    - Last Modified: Sat Nov 22 11:21:59 UTC 2025
    - 6K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/core/misc/Base64Util.java

         * <p>
         * This method uses {@link java.util.Base64.Encoder} for encoding.
         * </p>
         *
         * @param inData
         *            The data to encode
         * @return The encoded data, or an empty string if the input is null or empty
         */
        public static String encode(final byte[] inData) {
            if (ArrayUtil.isEmpty(inData)) {
                return "";
            }
            return Base64.getEncoder().encodeToString(inData);
    Registered: Sat Dec 20 08:55:33 UTC 2025
    - Last Modified: Sat Nov 22 11:21:59 UTC 2025
    - 2.1K bytes
    - Viewed (0)
Back to top