Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 8 of 8 for hocharacter (0.06 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/CharUtil.java

     */
    package org.codelibs.fess.crawler.util;
    
    /**
     * Utility class for character-related operations.
     */
    public final class CharUtil {
        /**
         * Private constructor to prevent instantiation of this utility class.
         */
        private CharUtil() {
        }
    
        /**
         * Checks if the given character is a valid URL character.
         *
         * Valid URL characters include:
         * - Lowercase letters (a-z)
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.3K bytes
    - Viewed (1)
  2. src/main/java/org/codelibs/fess/suggest/normalizer/HankakuKanaToZenkakuKana.java

            }
            return c;
        }
    
        /**
         * Merges two characters, handling voiced and semi-voiced sound marks.
         * @param c1 The first character.
         * @param c2 The second character.
         * @return The merged character.
         */
        public static char mergeChar(final char c1, final char c2) {
            if (c2 == '゙') {
                if ("カキクケコサシスセソタチツテトハヒフヘホ".indexOf(c1) >= 0) {
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Fri Jul 04 14:00:23 UTC 2025
    - 6.8K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/XmlUtil.java

         * according to the XML 1.0 specification. Valid characters include:
         * - Tab (0x9)
         * - Line feed (0xA)
         * - Carriage return (0xD)
         * - Any character between 0x20 and 0xD7FF
         * - Any character between 0xE000 and 0xFFFD
         * - Any character between 0x10000 and 0x10FFFF
         *
         * @param in the input string to be processed
         * @return a new string with invalid XML characters removed, or the original
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.4K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

            return parse(stream, Constants.UTF_8);
        }
    
        /**
         * Parses a robots.txt file from the given input stream using the specified character encoding.
         * @param stream the input stream to parse
         * @param charsetName the character encoding to use
         * @return the parsed RobotsTxt object, or null if disabled
         */
        public RobotsTxt parse(final InputStream stream, final String charsetName) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/EncodingHelper.java

    import java.util.HashMap;
    import java.util.Locale;
    import java.util.Map;
    
    import org.codelibs.core.lang.StringUtil;
    
    /**
     * EncodingHelper provides utility methods for managing and normalizing character encodings.
     * It allows setting a default encoding, mapping various encoding names to preferred ones,
     * and normalizing an encoding string to its preferred form or the default if no mapping is found.
     */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.2K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/Constants.java

    import java.nio.charset.StandardCharsets;
    
    /**
     * Constants used in the fess-crawler.
     * This class provides a collection of constant values for HTTP methods, status codes,
     * transformer names, boolean values, character encodings, date/time formats, and XML features.
     * It is designed to avoid the instantiation.
     */
    public final class Constants {
        /**
         * The GET method.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.6K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java

                new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE), new NumericEntityUnescaper());
    
        /**
         * Default character encoding for content extraction.
         */
        protected String encoding = Constants.UTF_8;
    
        /**
         * The preload size for charset detection.
         */
        protected int preloadSizeForCharset = 2048;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.5K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/TextTransformer.java

     * It also provides a method to retrieve the extracted data as a String from an AccessResultData object.
     *
     * <p>
     * The class handles character encoding issues by attempting to use the specified charset.
     * If the specified charset is invalid, it falls back to UTF-8.
     * </p>
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
Back to top