Unicode - Code Search

fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots_malformed.txt

Disallow: /test/

# Case 9: Special characters in paths
User-agent: SpecialCharBot
Disallow: /path with spaces/
Disallow: /path%20encoded/
Disallow: /path?query=value
Disallow: /path#fragment
Allow: /unicode/日本語/

# Case 10: Multiple User-agents in sequence
User-agent: Bot1
User-agent: Bot2
User-agent: Bot3
Disallow: /shared/

# Case 11: Sitemap with various formats
Sitemap: http://example.com/sitemap.xml

Registered: Sat Dec 20 11:21:39 UTC 2025

- Last Modified: Fri Nov 14 12:52:01 UTC 2025

- 2.6K bytes

- Viewed (0)

github.com/tiangolo/fastapi

scripts/translate.py

    Unicode U+201C (LEFT DOUBLE QUOTATION MARK) and Unicode U+201D (RIGHT DOUBLE QUOTATION MARK)

"English single typographic quotes"

    The characters «‘» and «’»
    Unicode U+2018 (LEFT SINGLE QUOTATION MARK) and Unicode U+2019 (RIGHT SINGLE QUOTATION MARK)

"code snippet"

Registered: Sun Dec 28 07:19:09 UTC 2025

- Last Modified: Sat Dec 27 19:05:53 UTC 2025

- 34.1K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorEnhancedTest.java

        assertTrue("Should contain line 9999", result.getContent().contains("Line 9999"));
    }

    /**
     * Test extraction with various Unicode characters.
     */
    public void test_getText_unicodeContent_extractsCorrectly() {
        final String unicodeContent = "Hello 世界 مرحبا мир שלום";

Registered: Sat Dec 20 11:21:39 UTC 2025

- Last Modified: Mon Nov 24 03:59:47 UTC 2025

- 8.9K bytes

- Viewed (0)

github.com/golang/go

.gitignore

/src/cmd/dist/dist
/src/cmd/go/internal/cfg/zdefaultcc.go
/src/cmd/internal/objabi/zbootstrap.go
/src/go/build/zcgo.go
/src/go/doc/headscan
/src/internal/buildcfg/zbootstrap.go
/src/internal/runtime/sys/zversion.go
/src/unicode/maketables
/src/time/tzdata/zzipdata.go
/test.out
/test/garbage/*.out
/test/pass.out
/test/run.out
/test/times.out

# This file includes artifacts of Go build that should not be checked in.

Registered: Tue Dec 30 11:13:12 UTC 2025

- Last Modified: Mon Nov 10 20:41:03 UTC 2025

- 975 bytes

- Viewed (0)

github.com/golang/go

src/bytes/bytes_test.go

		benchBytes(b, indexSizes, bmIndexRuneUnicode(unicode.Latin, 'é'))
	})
	b.Run("Cyrillic", func(b *testing.B) {
		// Cyrillic is mostly 2 and 3 byte runes.
		benchBytes(b, indexSizes, bmIndexRuneUnicode(unicode.Cyrillic, 'Ꙁ'))
	})
	b.Run("Han", func(b *testing.B) {
		// Han consists only of 3 and 4 byte runes.
		benchBytes(b, indexSizes, bmIndexRuneUnicode(unicode.Han, '𠀿'))
	})
}

Registered: Tue Dec 30 11:13:12 UTC 2025

- Last Modified: Tue Dec 23 23:54:14 UTC 2025

- 62.9K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/EXTRACTOR_TESTS_README.md

- Improved error messages with encoding
- Various encoding support
- Edge cases (empty, large, Unicode)
- Special characters handling

**Test Count**: 12 tests

**Key Scenarios**:
- ✅ Default UTF-8 encoding extraction
- ✅ Custom encoding support
- ✅ Error messages include encoding information
- ✅ Large content handling
- ✅ Unicode and special characters
- ✅ Empty and whitespace-only content

---

Registered: Sat Dec 20 11:21:39 UTC 2025

- Last Modified: Wed Nov 19 08:55:01 UTC 2025

- 5.7K bytes

- Viewed (0)

github.com/tiangolo/fastapi

docs/de/llm-prompt.md

### Target language

Translate to German (Deutsch).

Language code: de.


### Definitions

"hyphen"
    The character «-»
    Unicode U+002D (HYPHEN-MINUS)
    Alternative names: hyphen, dash, minus sign

"dash"
    The character «–»
    Unicode U+2013 (EN DASH)
    German name: Halbgeviertstrich


### Grammar to use when talking to the reader

Use the formal grammar (use «Sie» instead of «Du»).

Registered: Sun Dec 28 07:19:09 UTC 2025

- Last Modified: Fri Dec 26 09:39:53 UTC 2025

- 11.9K bytes

- Viewed (0)

github.com/golang/go

src/bytes/buffer.go

// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package bytes

// Simple byte buffer for marshaling data.

import (
	"errors"
	"io"
	"unicode/utf8"
)

// smallBufferSize is an initial allocation minimal capacity.
const smallBufferSize = 64

// A Buffer is a variable-sized buffer of bytes with [Buffer.Read] and [Buffer.Write] methods.

Registered: Tue Dec 30 11:13:12 UTC 2025

- Last Modified: Fri Nov 14 19:01:17 UTC 2025

- 16.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/util/CharUtilTest.java

        assertFalse(CharUtil.isUrlChar('\u00A0')); // non-breaking space
        assertFalse(CharUtil.isUrlChar('\u00FF')); // ÿ
    }

    public void test_isUrlChar_unicode() {
        // Test Unicode characters (not valid URL chars without encoding)
        assertFalse(CharUtil.isUrlChar('\u3042')); // あ (Hiragana)
        assertFalse(CharUtil.isUrlChar('\u4E00')); // 一 (CJK)

Registered: Sat Dec 20 11:21:39 UTC 2025

- Last Modified: Mon Nov 24 03:59:47 UTC 2025

- 5.6K bytes

- Viewed (0)

github.com/codelibs/curl4j

src/test/java/org/codelibs/curl/CurlRequestTest.java

        // Body with unicode characters
        String unicodeBody = "{\"message\":\"こんにちは世界\"}";
        request.body(unicodeBody);

        assertEquals(unicodeBody, request.body());
    }

    @Test
    public void testParamWithUnicodeCharacters() {
        CurlRequest request = new CurlRequest(Method.GET, "https://example.com");

        // Param with unicode characters

Registered: Sat Dec 20 09:13:53 UTC 2025

- Last Modified: Mon Nov 24 03:10:07 UTC 2025

- 20.5K bytes

- Viewed (0)

Search Options