xref: /llvm-project/libcxx/test/libcxx/input.output/iostream.format/print.fun/transcoding.pass.cpp (revision 427a5cf105c409993c812f4fb1868bac96fce0c5)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
10 // UNSUPPORTED: no-filesystem
11 // UNSUPPORTED: libcpp-has-no-unicode
12 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
13 // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=9000000
14 
15 // <print>
16 
17 // Tests the UTF-8 to UTF-16/32 encoding.
18 // UTF-16 is used on Windows to write to the Unicode API.
19 // UTF-32 is used to test the Windows behaviour on Linux using 32-bit wchar_t.
20 
21 #include <algorithm>
22 #include <array>
23 #include <cassert>
24 #include <print>
25 #include <string_view>
26 
27 #include "test_macros.h"
28 #include "make_string.h"
29 
30 #define SV(S) MAKE_STRING_VIEW(CharT, S)
31 
32 template <class CharT>
33 constexpr void test(std::basic_string_view<CharT> expected, std::string_view input) {
34   assert(expected.size() < 1024);
35   std::array<CharT, 1024> buffer;
36   std::ranges::fill(buffer, CharT('*'));
37 
38   auto out = std::__unicode::__transcode(input.begin(), input.end(), buffer.begin());
39 
40   assert(std::basic_string_view<CharT>(buffer.begin(), out) == expected);
41 
42   out = std::find_if(out, buffer.end(), [](CharT c) { return c != CharT('*'); });
43   assert(out == buffer.end());
44 }
45 
46 template <class CharT>
47 constexpr void test() {
48   // *** Test valid UTF-8 ***
49 #define TEST(S) test(SV(S), S)
50   TEST("hello world");
51   // copied from benchmarks/std_format_spec_string_unicode.bench.cpp
52   TEST("Lorem ipsum dolor sit amet, ne sensibus evertitur aliquando his. Iuvaret fabulas qui ex.");
53   TEST("Lōrem ipsūm dolor sīt æmeÞ, ea vel nostrud feuġǣit, muciūs tēmporiȝusrefērrēnÞur no mel.");
54   TEST("Лорем ипсум долор сит амет, еу диам тамяуам принципес вис, еяуидем цонцептам диспутандо");
55   TEST("入ト年媛ろ舗学ラロ準募ケカ社金スノ屋検れう策他セヲシ引口ぎ集7独ぱクふ出車ぽでぱ円輪ルノ受打わ。");
56   TEST("\U0001f636\u200d\U0001f32b\ufe0f");
57 #undef TEST
58 
59   // *** Test invalid UTF-8 ***
60   test(SV("\ufffd"), "\xc3");
61   test(SV("\ufffd("), "\xc3\x28");
62 
63   // Surrogate range
64   test(SV("\ufffd"), "\xed\xa0\x80"); // U+D800
65   test(SV("\ufffd"), "\xed\xaf\xbf"); // U+DBFF
66   test(SV("\ufffd"), "\xed\xbf\x80"); // U+DC00
67   test(SV("\ufffd"), "\xed\xbf\xbf"); // U+DFFF
68 
69   // Beyond valid values
70   test(SV("\ufffd"), "\xf4\x90\x80\x80"); // U+110000
71   test(SV("\ufffd"), "\xf4\xbf\xbf\xbf"); // U+11FFFF
72 
73   // Validates http://unicode.org/review/pr-121.html option 3.
74   test(SV("\u0061\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0062"), "\x61\xF1\x80\x80\xE1\x80\xC2\x62");
75 }
76 
77 constexpr bool test() {
78   test<char16_t>();
79   test<char32_t>();
80 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS)
81   test<wchar_t>();
82 #endif
83   return true;
84 }
85 
86 int main(int, char**) {
87   test();
88   static_assert(test());
89 
90   return 0;
91 }
92