1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 10 // UNSUPPORTED: no-filesystem 11 // UNSUPPORTED: libcpp-has-no-unicode 12 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME 13 // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=9000000 14 15 // <print> 16 17 // Tests the UTF-8 to UTF-16/32 encoding. 18 // UTF-16 is used on Windows to write to the Unicode API. 19 // UTF-32 is used to test the Windows behaviour on Linux using 32-bit wchar_t. 20 21 #include <algorithm> 22 #include <array> 23 #include <cassert> 24 #include <print> 25 #include <string_view> 26 27 #include "test_macros.h" 28 #include "make_string.h" 29 30 #define SV(S) MAKE_STRING_VIEW(CharT, S) 31 32 template <class CharT> 33 constexpr void test(std::basic_string_view<CharT> expected, std::string_view input) { 34 assert(expected.size() < 1024); 35 std::array<CharT, 1024> buffer; 36 std::ranges::fill(buffer, CharT('*')); 37 38 auto out = std::__unicode::__transcode(input.begin(), input.end(), buffer.begin()); 39 40 assert(std::basic_string_view<CharT>(buffer.begin(), out) == expected); 41 42 out = std::find_if(out, buffer.end(), [](CharT c) { return c != CharT('*'); }); 43 assert(out == buffer.end()); 44 } 45 46 template <class CharT> 47 constexpr void test() { 48 // *** Test valid UTF-8 *** 49 #define TEST(S) test(SV(S), S) 50 TEST("hello world"); 51 // copied from benchmarks/std_format_spec_string_unicode.bench.cpp 52 TEST("Lorem ipsum dolor sit amet, ne sensibus evertitur aliquando his. Iuvaret fabulas qui ex."); 53 TEST("Lōrem ipsūm dolor sīt æmeÞ, ea vel nostrud feuġǣit, muciūs tēmporiȝusrefērrēnÞur no mel."); 54 TEST("Лорем ипсум долор сит амет, еу диам тамяуам принципес вис, еяуидем цонцептам диспутандо"); 55 TEST("入ト年媛ろ舗学ラロ準募ケカ社金スノ屋検れう策他セヲシ引口ぎ集7独ぱクふ出車ぽでぱ円輪ルノ受打わ。"); 56 TEST("\U0001f636\u200d\U0001f32b\ufe0f"); 57 #undef TEST 58 59 // *** Test invalid UTF-8 *** 60 test(SV("\ufffd"), "\xc3"); 61 test(SV("\ufffd("), "\xc3\x28"); 62 63 // Surrogate range 64 test(SV("\ufffd"), "\xed\xa0\x80"); // U+D800 65 test(SV("\ufffd"), "\xed\xaf\xbf"); // U+DBFF 66 test(SV("\ufffd"), "\xed\xbf\x80"); // U+DC00 67 test(SV("\ufffd"), "\xed\xbf\xbf"); // U+DFFF 68 69 // Beyond valid values 70 test(SV("\ufffd"), "\xf4\x90\x80\x80"); // U+110000 71 test(SV("\ufffd"), "\xf4\xbf\xbf\xbf"); // U+11FFFF 72 73 // Validates http://unicode.org/review/pr-121.html option 3. 74 test(SV("\u0061\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0062"), "\x61\xF1\x80\x80\xE1\x80\xC2\x62"); 75 } 76 77 constexpr bool test() { 78 test<char16_t>(); 79 test<char32_t>(); 80 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) 81 test<wchar_t>(); 82 #endif 83 return true; 84 } 85 86 int main(int, char**) { 87 test(); 88 static_assert(test()); 89 90 return 0; 91 } 92