xref: /llvm-project/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp (revision 09e3a360581dc36d0820d3fb6da9bd7cfed87b5d)
1a4800735SMark de Wever //===----------------------------------------------------------------------===//
26a54dfbfSLouis Dionne //
3a4800735SMark de Wever // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a4800735SMark de Wever // See https://llvm.org/LICENSE.txt for license information.
5a4800735SMark de Wever // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a4800735SMark de Wever //
7a4800735SMark de Wever //===----------------------------------------------------------------------===//
8a4800735SMark de Wever 
9a4800735SMark de Wever // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
10520c7fbbSLouis Dionne // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
11a4800735SMark de Wever 
12a4800735SMark de Wever // This version runs the test when the platform has Unicode support.
13a4800735SMark de Wever // UNSUPPORTED: libcpp-has-no-unicode
14a4800735SMark de Wever 
153d334df5SLouis Dionne // TODO FMT This test should not require std::to_chars(floating-point)
16f0fc8c48SLouis Dionne // XFAIL: availability-fp_to_chars-missing
173d334df5SLouis Dionne 
18a4800735SMark de Wever // <format>
19a4800735SMark de Wever 
20a4800735SMark de Wever // This test the debug string type for the formatter specializations for char
21a4800735SMark de Wever // and string types. This tests Unicode strings.
22a4800735SMark de Wever 
23a4800735SMark de Wever #include <format>
24a4800735SMark de Wever 
25a4800735SMark de Wever #include <cassert>
26a4800735SMark de Wever #include <concepts>
27*09e3a360SLouis Dionne #include <cstdint>
289c8f3409SMark de Wever #include <iterator>
29a4800735SMark de Wever #include <list>
30a4800735SMark de Wever #include <vector>
31a4800735SMark de Wever 
32a4800735SMark de Wever #include "test_macros.h"
33a4800735SMark de Wever #include "make_string.h"
34a4800735SMark de Wever #include "test_format_string.h"
35f8bed136SMark de Wever #include "assert_macros.h"
363476b56fSMark de Wever #include "concat_macros.h"
37a4800735SMark de Wever 
38a4800735SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION
39a4800735SMark de Wever #  include <iostream>
40a4800735SMark de Wever #endif
41a4800735SMark de Wever 
42a4800735SMark de Wever #define SV(S) MAKE_STRING_VIEW(CharT, S)
43a4800735SMark de Wever 
44a4800735SMark de Wever auto test_format = []<class CharT, class... Args>(
45a4800735SMark de Wever                        std::basic_string_view<CharT> expected, test_format_string<CharT, Args...> fmt, Args&&... args) {
46a4800735SMark de Wever   {
47a4800735SMark de Wever     std::basic_string<CharT> out = std::format(fmt, std::forward<Args>(args)...);
48f8bed136SMark de Wever     TEST_REQUIRE(out == expected,
493476b56fSMark de Wever                  TEST_WRITE_CONCATENATED(
50f8bed136SMark de Wever                      "\nFormat string   ", fmt.get(), "\nExpected output ", expected, "\nActual output   ", out, '\n'));
51a4800735SMark de Wever   }
52a4800735SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION
53a4800735SMark de Wever   {
54a4800735SMark de Wever     std::basic_string<CharT> out = std::format(std::locale(), fmt, std::forward<Args>(args)...);
55a4800735SMark de Wever     assert(out == expected);
56a4800735SMark de Wever   }
57a4800735SMark de Wever #endif // TEST_HAS_NO_LOCALIZATION
58a4800735SMark de Wever };
59a4800735SMark de Wever 
60a4800735SMark de Wever auto test_format_to =
61a4800735SMark de Wever     []<class CharT, class... Args>(
62a4800735SMark de Wever         std::basic_string_view<CharT> expected, test_format_string<CharT, Args...> fmt, Args&&... args) {
63a4800735SMark de Wever       {
64a4800735SMark de Wever         std::basic_string<CharT> out(expected.size(), CharT(' '));
65a4800735SMark de Wever         auto it = std::format_to(out.begin(), fmt, std::forward<Args>(args)...);
66a4800735SMark de Wever         assert(it == out.end());
67a4800735SMark de Wever         assert(out == expected);
68a4800735SMark de Wever       }
69a4800735SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION
70a4800735SMark de Wever       {
71a4800735SMark de Wever         std::basic_string<CharT> out(expected.size(), CharT(' '));
72a4800735SMark de Wever         auto it = std::format_to(out.begin(), std::locale(), fmt, std::forward<Args>(args)...);
73a4800735SMark de Wever         assert(it == out.end());
74a4800735SMark de Wever         assert(out == expected);
75a4800735SMark de Wever       }
76a4800735SMark de Wever #endif // TEST_HAS_NO_LOCALIZATION
77a4800735SMark de Wever       {
78a4800735SMark de Wever         std::list<CharT> out;
79a4800735SMark de Wever         std::format_to(std::back_inserter(out), fmt, std::forward<Args>(args)...);
80a4800735SMark de Wever         assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
81a4800735SMark de Wever       }
82a4800735SMark de Wever       {
83a4800735SMark de Wever         std::vector<CharT> out;
84a4800735SMark de Wever         std::format_to(std::back_inserter(out), fmt, std::forward<Args>(args)...);
85a4800735SMark de Wever         assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
86a4800735SMark de Wever       }
87a4800735SMark de Wever       {
88a4800735SMark de Wever         assert(expected.size() < 4096 && "Update the size of the buffer.");
89a4800735SMark de Wever         CharT out[4096];
90a4800735SMark de Wever         CharT* it = std::format_to(out, fmt, std::forward<Args>(args)...);
91a4800735SMark de Wever         assert(std::distance(out, it) == int(expected.size()));
92a4800735SMark de Wever         // Convert to std::string since output contains '\0' for boolean tests.
93a4800735SMark de Wever         assert(std::basic_string<CharT>(out, it) == expected);
94a4800735SMark de Wever       }
95a4800735SMark de Wever     };
96a4800735SMark de Wever 
97a4800735SMark de Wever auto test_formatted_size =
98a4800735SMark de Wever     []<class CharT, class... Args>(
99a4800735SMark de Wever         std::basic_string_view<CharT> expected, test_format_string<CharT, Args...> fmt, Args&&... args) {
100a4800735SMark de Wever       {
101fb855eb9SMark de Wever         std::size_t size = std::formatted_size(fmt, std::forward<Args>(args)...);
102a4800735SMark de Wever         assert(size == expected.size());
103a4800735SMark de Wever       }
104a4800735SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION
105a4800735SMark de Wever       {
106fb855eb9SMark de Wever         std::size_t size = std::formatted_size(std::locale(), fmt, std::forward<Args>(args)...);
107a4800735SMark de Wever         assert(size == expected.size());
108a4800735SMark de Wever       }
109a4800735SMark de Wever #endif // TEST_HAS_NO_LOCALIZATION
110a4800735SMark de Wever     };
111a4800735SMark de Wever 
112a4800735SMark de Wever auto test_format_to_n =
113a4800735SMark de Wever     []<class CharT, class... Args>(
114a4800735SMark de Wever         std::basic_string_view<CharT> expected, test_format_string<CharT, Args...> fmt, Args&&... args) {
115a4800735SMark de Wever       {
116fb855eb9SMark de Wever         std::size_t n = expected.size();
117a4800735SMark de Wever         std::basic_string<CharT> out(n, CharT(' '));
118a4800735SMark de Wever         std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward<Args>(args)...);
119d8681356SMark de Wever         assert(result.size == static_cast<std::ptrdiff_t>(expected.size()));
120a4800735SMark de Wever         assert(result.out == out.end());
121a4800735SMark de Wever         assert(out == expected);
122a4800735SMark de Wever       }
123a4800735SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION
124a4800735SMark de Wever       {
125fb855eb9SMark de Wever         std::size_t n = expected.size();
126a4800735SMark de Wever         std::basic_string<CharT> out(n, CharT(' '));
127a4800735SMark de Wever         std::format_to_n_result result =
128a4800735SMark de Wever             std::format_to_n(out.begin(), n, std::locale(), fmt, std::forward<Args>(args)...);
129d8681356SMark de Wever         assert(result.size == static_cast<std::ptrdiff_t>(expected.size()));
130a4800735SMark de Wever         assert(result.out == out.end());
131a4800735SMark de Wever         assert(out == expected);
132a4800735SMark de Wever       }
133a4800735SMark de Wever #endif // TEST_HAS_NO_LOCALIZATION
134a4800735SMark de Wever       {
135d8681356SMark de Wever         std::ptrdiff_t n = 0;
136a4800735SMark de Wever         std::basic_string<CharT> out;
137a4800735SMark de Wever         std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward<Args>(args)...);
138d8681356SMark de Wever         assert(result.size == static_cast<std::ptrdiff_t>(expected.size()));
139a4800735SMark de Wever         assert(result.out == out.end());
140a4800735SMark de Wever         assert(out.empty());
141a4800735SMark de Wever       }
142a4800735SMark de Wever       {
143d8681356SMark de Wever         std::ptrdiff_t n = expected.size() / 2;
144a4800735SMark de Wever         std::basic_string<CharT> out(n, CharT(' '));
145a4800735SMark de Wever         std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward<Args>(args)...);
146d8681356SMark de Wever         assert(result.size == static_cast<std::ptrdiff_t>(expected.size()));
147a4800735SMark de Wever         assert(result.out == out.end());
148a4800735SMark de Wever         assert(out == expected.substr(0, n));
149a4800735SMark de Wever       }
150a4800735SMark de Wever     };
151a4800735SMark de Wever 
152a4800735SMark de Wever template <class CharT>
153a4800735SMark de Wever void test_char() {
154a4800735SMark de Wever   // *** P2286 examples ***
155a4800735SMark de Wever   test_format(SV("['\\'', '\"']"), SV("[{:?}, {:?}]"), CharT('\''), CharT('"'));
156a4800735SMark de Wever 
157f5832babSStephan T. Lavavej   // *** Special cases ***
158a4800735SMark de Wever   test_format(SV("'\\t'"), SV("{:?}"), CharT('\t'));
159a4800735SMark de Wever   test_format(SV("'\\n'"), SV("{:?}"), CharT('\n'));
160a4800735SMark de Wever   test_format(SV("'\\r'"), SV("{:?}"), CharT('\r'));
161a4800735SMark de Wever   test_format(SV("'\\\\'"), SV("{:?}"), CharT('\\'));
162a4800735SMark de Wever 
163a4800735SMark de Wever   test_format(SV("'\\\''"), SV("{:?}"), CharT('\''));
164a4800735SMark de Wever   test_format(SV("'\"'"), SV("{:?}"), CharT('"')); // only special for string
165a4800735SMark de Wever 
166a4800735SMark de Wever   test_format(SV("' '"), SV("{:?}"), CharT(' '));
167a4800735SMark de Wever 
168a4800735SMark de Wever   // *** Printable ***
169a4800735SMark de Wever   test_format(SV("'a'"), SV("{:?}"), CharT('a'));
170a4800735SMark de Wever   test_format(SV("'b'"), SV("{:?}"), CharT('b'));
171a4800735SMark de Wever   test_format(SV("'c'"), SV("{:?}"), CharT('c'));
172a4800735SMark de Wever 
173a4800735SMark de Wever   // *** Non-printable ***
174a4800735SMark de Wever 
175a4800735SMark de Wever   // Control
176a4800735SMark de Wever   test_format(SV("'\\u{0}'"), SV("{:?}"), CharT('\0'));
177a4800735SMark de Wever   test_format(SV("'\\u{1f}'"), SV("{:?}"), CharT('\x1f'));
178a4800735SMark de Wever 
179a4800735SMark de Wever   // Ill-formed
180a4800735SMark de Wever   if constexpr (sizeof(CharT) == 1)
181a4800735SMark de Wever     test_format(SV("'\\x{80}'"), SV("{:?}"), CharT('\x80'));
182a4800735SMark de Wever 
183a4800735SMark de Wever #ifndef TEST_HAS_NO_WIDE_CHARACTERS
184a4800735SMark de Wever   if constexpr (sizeof(CharT) > 1) {
185a4800735SMark de Wever     using V = std::basic_string_view<CharT>;
186a4800735SMark de Wever 
187a4800735SMark de Wever     // Unicode fitting in a 16-bit wchar_t
188a4800735SMark de Wever 
189a4800735SMark de Wever     // *** Non-printable ***
190a4800735SMark de Wever 
191a4800735SMark de Wever     // Space_Separator
192a4800735SMark de Wever     test_format(V{L"'\\u{a0}'"}, L"{:?}", L'\xa0');     // NO-BREAK SPACE
193a4800735SMark de Wever     test_format(V{L"'\\u{3000}'"}, L"{:?}", L'\x3000'); // IDEOGRAPHIC SPACE
194a4800735SMark de Wever 
195a4800735SMark de Wever     // Line_Separator
196a4800735SMark de Wever     test_format(V{L"'\\u{2028}'"}, L"{:?}", L'\x2028'); // LINE SEPARATOR
197a4800735SMark de Wever 
198a4800735SMark de Wever     // Paragraph_Separator
199a4800735SMark de Wever     test_format(V{L"'\\u{2029}'"}, L"{:?}", L'\x2029'); // PARAGRAPH SEPARATOR
200a4800735SMark de Wever 
201a4800735SMark de Wever     // Format
202a4800735SMark de Wever     test_format(V{L"'\\u{ad}'"}, L"{:?}", L'\xad');     // SOFT HYPHEN
203a4800735SMark de Wever     test_format(V{L"'\\u{600}'"}, L"{:?}", L'\x600');   // ARABIC NUMBER SIGN
204a4800735SMark de Wever     test_format(V{L"'\\u{feff}'"}, L"{:?}", L'\xfeff'); // ZERO WIDTH NO-BREAK SPACE
205a4800735SMark de Wever 
206a4800735SMark de Wever     // Incomplete surrogate pair in UTF-16
207a4800735SMark de Wever     test_format(V{L"'\\x{d800}'"}, L"{:?}", L'\xd800'); // <surrogate-D800>
208a4800735SMark de Wever     test_format(V{L"'\\x{dfff}'"}, L"{:?}", L'\xdfff'); // <surrogate-DFFF>
209a4800735SMark de Wever 
210a4800735SMark de Wever     // Private_Use
211a4800735SMark de Wever     test_format(V{L"'\\u{e000}'"}, L"{:?}", L'\xe000'); // <private-use-E000>
212a4800735SMark de Wever     test_format(V{L"'\\u{f8ff}'"}, L"{:?}", L'\xf8ff'); // <private-use-F8FF>
213a4800735SMark de Wever 
214a4800735SMark de Wever     // Unassigned
215a4800735SMark de Wever     test_format(V{L"'\\u{378}'"}, L"{:?}", L'\x378');   // <reserved-0378>
216a4800735SMark de Wever     test_format(V{L"'\\u{1774}'"}, L"{:?}", L'\x1774'); // <reserved-1774>
217a4800735SMark de Wever     test_format(V{L"'\\u{ffff}'"}, L"{:?}", L'\xffff'); // <noncharacter-FFFF>
218a4800735SMark de Wever 
219a4800735SMark de Wever     // Grapheme Extended
220a4800735SMark de Wever     test_format(V{L"'\\u{300}'"}, L"{:?}", L'\x300');   // COMBINING GRAVE ACCENT
221a4800735SMark de Wever     test_format(V{L"'\\u{fe20}'"}, L"{:?}", L'\xfe20'); // VARIATION SELECTOR-1
222a4800735SMark de Wever   }
223a4800735SMark de Wever #  ifndef TEST_SHORT_WCHAR
224a4800735SMark de Wever   if constexpr (sizeof(CharT) > 2) {
225a4800735SMark de Wever     static_assert(sizeof(CharT) == 4, "add support for unexpected size");
226a4800735SMark de Wever     // Unicode fitting in a 32-bit wchar_t
227a4800735SMark de Wever 
228a4800735SMark de Wever     constexpr wchar_t x       = 0x1ffff;
229da79d6e1SMark de Wever     constexpr std::uint32_t y = 0x1ffff;
230a4800735SMark de Wever     static_assert(x == y);
231a4800735SMark de Wever 
232a4800735SMark de Wever     using V = std::basic_string_view<CharT>;
233a4800735SMark de Wever 
234a4800735SMark de Wever     // *** Non-printable ***
235a4800735SMark de Wever     // Format
236a4800735SMark de Wever     test_format(V{L"'\\u{110bd}'"}, L"{:?}", L'\x110bd'); // KAITHI NUMBER SIGN
237a4800735SMark de Wever     test_format(V{L"'\\u{e007f}'"}, L"{:?}", L'\xe007f'); // CANCEL TAG
238a4800735SMark de Wever 
239a4800735SMark de Wever     // Private_Use
240a4800735SMark de Wever     test_format(V{L"'\\u{f0000}'"}, L"{:?}", L'\xf0000'); // <private-use-F0000>
241a4800735SMark de Wever     test_format(V{L"'\\u{ffffd}'"}, L"{:?}", L'\xffffd'); // <private-use-FFFFD>
242a4800735SMark de Wever 
243a4800735SMark de Wever     test_format(V{L"'\\u{100000}'"}, L"{:?}", L'\x100000'); // <private-use-100000>
244a4800735SMark de Wever     test_format(V{L"'\\u{10fffd}'"}, L"{:?}", L'\x10fffd'); // <private-use-10FFFD>
245a4800735SMark de Wever 
246a4800735SMark de Wever     // Unassigned
247a4800735SMark de Wever     test_format(V{L"'\\u{1000c}'"}, L"{:?}", L'\x1000c');   // <reserved-1000c>
248a4800735SMark de Wever     test_format(V{L"'\\u{fffff}'"}, L"{:?}", L'\xfffff');   // <noncharacter-FFFFF>
249a4800735SMark de Wever     test_format(V{L"'\\u{10fffe}'"}, L"{:?}", L'\x10fffe'); // <noncharacter-10FFFE>
250a4800735SMark de Wever 
251a4800735SMark de Wever     // Grapheme Extended
252a4800735SMark de Wever     test_format(V{L"'\\u{101fd}'"}, L"{:?}", L'\x101fd'); // COMBINING OLD PERMIC LETTER AN
253a4800735SMark de Wever     test_format(V{L"'\\u{e0100}'"}, L"{:?}", L'\xe0100'); // VARIATION SELECTOR-17
254a4800735SMark de Wever 
255a4800735SMark de Wever     // Ill-formed
256a4800735SMark de Wever     test_format(V{L"'\\x{110000}'"}, L"{:?}", L'\x110000');
257a4800735SMark de Wever     test_format(V{L"'\\x{ffffffff}'"}, L"{:?}", L'\xffffffff');
258a4800735SMark de Wever   }
259a4800735SMark de Wever #  endif // TEST_SHORT_WCHAR
260a4800735SMark de Wever #endif   // TEST_HAS_NO_WIDE_CHARACTERS
261a4800735SMark de Wever }
262a4800735SMark de Wever 
263a4800735SMark de Wever template <class CharT>
264a4800735SMark de Wever void test_string() {
265a4800735SMark de Wever   // *** P2286 examples ***
266a4800735SMark de Wever   test_format(SV("[h\tllo]"), SV("[{}]"), SV("h\tllo"));
267a4800735SMark de Wever   test_format(SV(R"(["h\tllo"])"), SV("[{:?}]"), SV("h\tllo"));
268a4800735SMark de Wever   test_format(SV(R"(["Спасибо, Виктор ♥!"])"), SV("[{:?}]"), SV("Спасибо, Виктор ♥!"));
269a4800735SMark de Wever 
270a4800735SMark de Wever   test_format(SV(R"(["\u{0} \n \t \u{2} \u{1b}"])"), SV("[{:?}]"), SV("\0 \n \t \x02 \x1b"));
271a4800735SMark de Wever 
272a4800735SMark de Wever   if constexpr (sizeof(CharT) == 1) {
273a4800735SMark de Wever     // Ill-formend UTF-8
274a4800735SMark de Wever     test_format(SV(R"(["\x{c3}"])"), SV("[{:?}]"), "\xc3");
275a4800735SMark de Wever     test_format(SV(R"(["\x{c3}("])"), SV("[{:?}]"), "\xc3\x28");
276c866855bSMark de Wever 
277c866855bSMark de Wever     /* U+0000..U+0007F 1 code unit range, encoded in 2 code units. */
278c866855bSMark de Wever     test_format(SV(R"(["\x{c0}\x{80}"])"), SV("[{:?}]"), "\xc0\x80"); // U+0000
279c866855bSMark de Wever     test_format(SV(R"(["\x{c1}\x{bf}"])"), SV("[{:?}]"), "\xc1\xbf"); // U+007F
280c866855bSMark de Wever     test_format(SV(R"(["\u{80}"])"), SV("[{:?}]"), "\xc2\x80");       // U+0080 first valid (General_Category=Control)
281c866855bSMark de Wever 
282c866855bSMark de Wever     /* U+0000..U+07FFF 1 and 2 code unit range, encoded in 3 code units. */
283c866855bSMark de Wever     test_format(SV(R"(["\x{e0}\x{80}\x{80}"])"), SV("[{:?}]"), "\xe0\x80\x80"); // U+0000
284c866855bSMark de Wever     test_format(SV(R"(["\x{e0}\x{81}\x{bf}"])"), SV("[{:?}]"), "\xe0\x81\xbf"); // U+007F
285c866855bSMark de Wever     test_format(SV(R"(["\x{e0}\x{82}\x{80}"])"), SV("[{:?}]"), "\xe0\x82\x80"); // U+0080
286c866855bSMark de Wever     test_format(SV(R"(["\x{e0}\x{9f}\x{bf}"])"), SV("[{:?}]"), "\xe0\x9f\xbf"); // U+07FF
287c866855bSMark de Wever     test_format(SV("[\"\u0800\"]"), SV("[{:?}]"), "\xe0\xa0\x80");              // U+0800 first valid
288c866855bSMark de Wever 
289c866855bSMark de Wever #if 0
290c866855bSMark de Wever 	// This code point is in the Hangul Jamo Extended-B block and at the time of writing
291c866855bSMark de Wever 	// it's unassigned. When it comes defined, this branch might become true.
292c866855bSMark de Wever     test_format(SV("[\"\ud7ff\"]"), SV("[{:?}]"), "\xed\x9f\xbf");              // U+D7FF last valid
293c866855bSMark de Wever #else
294c866855bSMark de Wever     /* U+D800..D+DFFFF surrogate range */
295c866855bSMark de Wever     test_format(SV(R"(["\u{d7ff}"])"), SV("[{:?}]"), "\xed\x9f\xbf"); // U+D7FF last valid
296c866855bSMark de Wever #endif
297c866855bSMark de Wever     test_format(SV(R"(["\x{ed}\x{a0}\x{80}"])"), SV("[{:?}]"), "\xed\xa0\x80"); // U+D800
298c866855bSMark de Wever     test_format(SV(R"(["\x{ed}\x{af}\x{bf}"])"), SV("[{:?}]"), "\xed\xaf\xbf"); // U+DBFF
299c866855bSMark de Wever     test_format(SV(R"(["\x{ed}\x{bf}\x{80}"])"), SV("[{:?}]"), "\xed\xbf\x80"); // U+DC00
300c866855bSMark de Wever     test_format(SV(R"(["\x{ed}\x{bf}\x{bf}"])"), SV("[{:?}]"), "\xed\xbf\xbf"); // U+DFFF
301c866855bSMark de Wever     test_format(SV(R"(["\u{e000}"])"), SV("[{:?}]"), "\xee\x80\x80");           // U+E000 first valid
302c866855bSMark de Wever                                                                                 // (in the Private Use Area block)
303c866855bSMark de Wever 
304c866855bSMark de Wever     /* U+0000..U+FFFF 1, 2, and 3 code unit range */
305c866855bSMark de Wever     test_format(SV(R"(["\x{f0}\x{80}\x{80}\x{80}"])"), SV("[{:?}]"), "\xf0\x80\x80\x80"); // U+0000
306c866855bSMark de Wever     test_format(SV(R"(["\x{f0}\x{80}\x{81}\x{bf}"])"), SV("[{:?}]"), "\xf0\x80\x81\xbf"); // U+007F
307c866855bSMark de Wever     test_format(SV(R"(["\x{f0}\x{80}\x{82}\x{80}"])"), SV("[{:?}]"), "\xf0\x80\x82\x80"); // U+0080
308c866855bSMark de Wever     test_format(SV(R"(["\x{f0}\x{80}\x{9f}\x{bf}"])"), SV("[{:?}]"), "\xf0\x80\x9f\xbf"); // U+07FF
309c866855bSMark de Wever     test_format(SV(R"(["\x{f0}\x{80}\x{a0}\x{80}"])"), SV("[{:?}]"), "\xf0\x80\xa0\x80"); // U+0800
310c866855bSMark de Wever     test_format(SV(R"(["\x{f0}\x{8f}\x{bf}\x{bf}"])"), SV("[{:?}]"), "\xf0\x8f\xbf\xbf"); // U+FFFF
311c866855bSMark de Wever     test_format(SV("[\"\U00010000\"]"), SV("[{:?}]"), "\xf0\x90\x80\x80");                // U+10000 first valid
312c866855bSMark de Wever 
313c866855bSMark de Wever     /* U+10FFFF..U+1FFFFF invalid range */
314c866855bSMark de Wever     test_format(SV(R"(["\u{10ffff}"])"), SV("[{:?}]"), "\xf4\x8f\xbf\xbf"); // U+10FFFF last valid
315c866855bSMark de Wever                                                                             // (in Supplementary Private Use Area-B)
316c866855bSMark de Wever     test_format(SV(R"(["\x{f4}\x{90}\x{80}\x{80}"])"), SV("[{:?}]"), "\xf4\x90\x80\x80"); // U+110000
317c866855bSMark de Wever     test_format(SV(R"(["\x{f4}\x{bf}\x{bf}\x{bf}"])"), SV("[{:?}]"), "\xf4\xbf\xbf\xbf"); // U+11FFFF
318a4800735SMark de Wever   } else {
319a4800735SMark de Wever     // Valid UTF-16 and UTF-32
320a4800735SMark de Wever     test_format(SV("[\"\u00c3\"]"), SV("[{:?}]"), L"\xc3"); // LATIN CAPITAL LETTER A WITH TILDE
321a4800735SMark de Wever     test_format(SV("[\"\u00c3(\"]"), SV("[{:?}]"), L"\xc3\x28");
322a4800735SMark de Wever   }
323a4800735SMark de Wever 
324ad76a859SMark de Wever   // LWG-3965
325ad76a859SMark de Wever   test_format(SV(R"(["����\u{200d}♂️"])"), SV("[{:?}]"), SV("����‍♂️"));
326a4800735SMark de Wever 
327f5832babSStephan T. Lavavej   // *** Special cases ***
328a4800735SMark de Wever   test_format(SV(R"("\t\n\r\\'\" ")"), SV("{:?}"), SV("\t\n\r\\'\" "));
329a4800735SMark de Wever 
330a4800735SMark de Wever   // *** Printable ***
331a4800735SMark de Wever   test_format(SV(R"("abcdefg")"), SV("{:?}"), SV("abcdefg"));
332a4800735SMark de Wever 
333a4800735SMark de Wever   // *** Non-printable ***
334a4800735SMark de Wever 
335a4800735SMark de Wever   // Control
336a4800735SMark de Wever   test_format(SV(R"("\u{0}\u{1f}")"), SV("{:?}"), SV("\0\x1f"));
337a4800735SMark de Wever 
338a4800735SMark de Wever   // Ill-formed
339a4800735SMark de Wever   if constexpr (sizeof(CharT) == 1)
340a4800735SMark de Wever     test_format(SV(R"("\x{80}")"), SV("{:?}"), SV("\x80"));
341a4800735SMark de Wever 
342ad76a859SMark de Wever   // *** P2713R1 examples ***
343ad76a859SMark de Wever   test_format(SV(R"(["\u{301}"])"), SV("[{:?}]"), SV("\u0301"));
344ad76a859SMark de Wever   test_format(SV(R"(["\\\u{301}"])"), SV("[{:?}]"), SV("\\\u0301"));
345ad76a859SMark de Wever   test_format(SV(R"(["ẹ́"])"), SV("[{:?}]"), SV("e\u0301\u0323"));
346ad76a859SMark de Wever 
347a4800735SMark de Wever #ifndef TEST_HAS_NO_WIDE_CHARACTERS
348a4800735SMark de Wever   if constexpr (sizeof(CharT) > 1) {
349a4800735SMark de Wever     using V = std::basic_string_view<CharT>;
350a4800735SMark de Wever 
351a4800735SMark de Wever     // Unicode fitting in a 16-bit wchar_t
352a4800735SMark de Wever 
353a4800735SMark de Wever     // *** Non-printable ***
354a4800735SMark de Wever 
355a4800735SMark de Wever     // Space_Separator
356a4800735SMark de Wever     test_format(V{LR"("\u{a0}\u{3000}")"}, L"{:?}", L"\xa0\x3000");
357a4800735SMark de Wever 
358a4800735SMark de Wever     // Line_Separator
359a4800735SMark de Wever     test_format(V{LR"("\u{2028}")"}, L"{:?}", L"\x2028"); // LINE SEPARATOR
360a4800735SMark de Wever 
361a4800735SMark de Wever     // Paragraph_Separator
362a4800735SMark de Wever     test_format(V{LR"("\u{2029}")"}, L"{:?}", L"\x2029"); // PARAGRAPH SEPARATOR
363a4800735SMark de Wever 
364a4800735SMark de Wever     // Format
365a4800735SMark de Wever     test_format(V{LR"("\u{ad}\u{600}\u{feff}")"}, L"{:?}", L"\xad\x600\xfeff");
366a4800735SMark de Wever 
367a4800735SMark de Wever     // Incomplete surrogate pair in UTF-16
368a4800735SMark de Wever     test_format(V{LR"("\x{d800}")"}, L"{:?}", L"\xd800");
369a4800735SMark de Wever 
370a4800735SMark de Wever     // Private_Use
371a4800735SMark de Wever     test_format(V{LR"("\u{e000}\u{f8ff}")"}, L"{:?}", L"\xe000\xf8ff");
372a4800735SMark de Wever 
373a4800735SMark de Wever     // Unassigned
374a4800735SMark de Wever     test_format(V{LR"("\u{378}\u{1774}\u{ffff}")"}, L"{:?}", L"\x378\x1774\xffff");
375a4800735SMark de Wever 
376a4800735SMark de Wever     // Grapheme Extended
377a4800735SMark de Wever     test_format(V{LR"("\u{300}\u{fe20}")"}, L"{:?}", L"\x300\xfe20");
378a4800735SMark de Wever   }
379a4800735SMark de Wever #  ifndef TEST_SHORT_WCHAR
380a4800735SMark de Wever   if constexpr (sizeof(CharT) > 2) {
381a4800735SMark de Wever     static_assert(sizeof(CharT) == 4, "add support for unexpected size");
382a4800735SMark de Wever     // Unicode fitting in a 32-bit wchar_t
383a4800735SMark de Wever 
384a4800735SMark de Wever     constexpr wchar_t x       = 0x1ffff;
385da79d6e1SMark de Wever     constexpr std::uint32_t y = 0x1ffff;
386a4800735SMark de Wever     static_assert(x == y);
387a4800735SMark de Wever 
388a4800735SMark de Wever     using V = std::basic_string_view<CharT>;
389a4800735SMark de Wever 
390a4800735SMark de Wever     // *** Non-printable ***
391a4800735SMark de Wever     // Format
392a4800735SMark de Wever     test_format(V{LR"("\u{110bd}\u{e007f}")"}, L"{:?}", L"\x110bd\xe007f");
393a4800735SMark de Wever 
394a4800735SMark de Wever     // Private_Use
395a4800735SMark de Wever     test_format(V{LR"("\u{f0000}\u{ffffd}\u{100000}\u{10fffd}")"}, L"{:?}", L"\xf0000\xffffd\x100000\x10fffd");
396a4800735SMark de Wever 
397a4800735SMark de Wever     // Unassigned
398a4800735SMark de Wever     test_format(V{LR"("\u{1000c}\u{fffff}\u{10fffe}")"}, L"{:?}", L"\x1000c\xfffff\x10fffe");
399a4800735SMark de Wever 
400a4800735SMark de Wever     // Grapheme Extended
401a4800735SMark de Wever     test_format(V{LR"("\u{101fd}\u{e0100}")"}, L"{:?}", L"\x101fd\xe0100");
402a4800735SMark de Wever 
403a4800735SMark de Wever     // Ill-formed
404a4800735SMark de Wever     test_format(V{LR"("\x{110000}\x{ffffffff}")"}, L"{:?}", L"\x110000\xffffffff");
405a4800735SMark de Wever   }
406a4800735SMark de Wever #  endif // TEST_SHORT_WCHAR
407a4800735SMark de Wever #endif   // TEST_HAS_NO_WIDE_CHARACTERS
408a4800735SMark de Wever }
409a4800735SMark de Wever 
410a4800735SMark de Wever template <class CharT, class TestFunction>
411a4800735SMark de Wever void test_format_functions(TestFunction check) {
412a4800735SMark de Wever   // *** align-fill & width ***
413a4800735SMark de Wever   check(SV(R"(***"hellö")"), SV("{:*>10?}"), SV("hellö")); // ö is LATIN SMALL LETTER O WITH DIAERESIS
414a4800735SMark de Wever   check(SV(R"(*"hellö"**)"), SV("{:*^10?}"), SV("hellö"));
415a4800735SMark de Wever   check(SV(R"("hellö"***)"), SV("{:*<10?}"), SV("hellö"));
416a4800735SMark de Wever 
417ad76a859SMark de Wever   check(SV(R"(***"hellö")"), SV("{:*>10?}"), SV("hello\u0308"));
418ad76a859SMark de Wever   check(SV(R"(*"hellö"**)"), SV("{:*^10?}"), SV("hello\u0308"));
419ad76a859SMark de Wever   check(SV(R"("hellö"***)"), SV("{:*<10?}"), SV("hello\u0308"));
420a4800735SMark de Wever 
421ad76a859SMark de Wever   check(SV(R"(***"hello ����\u{200d}♂️")"), SV("{:*>22?}"), SV("hello ����‍♂️"));
422ad76a859SMark de Wever   check(SV(R"(*"hello ����\u{200d}♂️"**)"), SV("{:*^22?}"), SV("hello ����‍♂️"));
423ad76a859SMark de Wever   check(SV(R"("hello ����\u{200d}♂️"***)"), SV("{:*<22?}"), SV("hello ����‍♂️"));
424a4800735SMark de Wever 
425a4800735SMark de Wever   // *** width ***
426a4800735SMark de Wever   check(SV(R"("hellö"   )"), SV("{:10?}"), SV("hellö"));
427ad76a859SMark de Wever   check(SV(R"("hellö"   )"), SV("{:10?}"), SV("hello\u0308"));
428ad76a859SMark de Wever   check(SV(R"("hello ����\u{200d}♂️"   )"), SV("{:22?}"), SV("hello ����‍♂️"));
429a4800735SMark de Wever 
430a4800735SMark de Wever   // *** precision ***
431a4800735SMark de Wever   check(SV(R"("hell)"), SV("{:.5?}"), SV("hellö"));
432a4800735SMark de Wever   check(SV(R"("hellö)"), SV("{:.6?}"), SV("hellö"));
433a4800735SMark de Wever   check(SV(R"("hellö")"), SV("{:.7?}"), SV("hellö"));
434a4800735SMark de Wever 
435a4800735SMark de Wever   check(SV(R"("hello )"), SV("{:.7?}"), SV("hello ����‍♂️"));
436a4800735SMark de Wever   check(SV(R"("hello )"), SV("{:.8?}"), SV("hello ����‍♂️")); // shrug is two columns
437a4800735SMark de Wever   check(SV(R"("hello ����)"), SV("{:.9?}"), SV("hello ����‍♂️"));
438a4800735SMark de Wever   check(SV(R"("hello ����\)"), SV("{:.10?}"), SV("hello ����‍♂️"));
439a4800735SMark de Wever   check(SV(R"("hello ����\u{200d})"), SV("{:.17?}"), SV("hello ����‍♂️"));
440ad76a859SMark de Wever   check(SV(R"("hello ����\u{200d}♂️)"), SV("{:.18?}"), SV("hello ����‍♂️"));
441ad76a859SMark de Wever   check(SV(R"("hello ����\u{200d}♂️")"), SV("{:.19?}"), SV("hello ����‍♂️"));
442a4800735SMark de Wever 
443a4800735SMark de Wever   // *** width & precision ***
444a4800735SMark de Wever   check(SV(R"("hell#########################)"), SV("{:#<30.5?}"), SV("hellö"));
445a4800735SMark de Wever   check(SV(R"("hellö########################)"), SV("{:#<30.6?}"), SV("hellö"));
446a4800735SMark de Wever   check(SV(R"("hellö"#######################)"), SV("{:#<30.7?}"), SV("hellö"));
447a4800735SMark de Wever 
448a4800735SMark de Wever   check(SV(R"("hello #######################)"), SV("{:#<30.7?}"), SV("hello ����‍♂️"));
449a4800735SMark de Wever   check(SV(R"("hello #######################)"), SV("{:#<30.8?}"), SV("hello ����‍♂️"));
450a4800735SMark de Wever   check(SV(R"("hello ����#####################)"), SV("{:#<30.9?}"), SV("hello ����‍♂️"));
451a4800735SMark de Wever   check(SV(R"("hello ����\####################)"), SV("{:#<30.10?}"), SV("hello ����‍♂️"));
452a4800735SMark de Wever   check(SV(R"("hello ����\u{200d}#############)"), SV("{:#<30.17?}"), SV("hello ����‍♂️"));
453ad76a859SMark de Wever   check(SV(R"("hello ����\u{200d}♂️############)"), SV("{:#<30.18?}"), SV("hello ����‍♂️"));
454ad76a859SMark de Wever   check(SV(R"("hello ����\u{200d}♂️"###########)"), SV("{:#<30.19?}"), SV("hello ����‍♂️"));
455a4800735SMark de Wever }
456a4800735SMark de Wever 
457a4800735SMark de Wever template <class CharT>
458a4800735SMark de Wever void test() {
459a4800735SMark de Wever   test_char<CharT>();
460a4800735SMark de Wever   test_string<CharT>();
461a4800735SMark de Wever 
462a4800735SMark de Wever   test_format_functions<CharT>(test_format);
463a4800735SMark de Wever   test_format_functions<CharT>(test_format_to);
464a4800735SMark de Wever   test_format_functions<CharT>(test_formatted_size);
465a4800735SMark de Wever   test_format_functions<CharT>(test_format_to_n);
466a4800735SMark de Wever }
467a4800735SMark de Wever 
468a4800735SMark de Wever static void test_ill_formed_utf8() {
469a4800735SMark de Wever   using namespace std::literals;
470a4800735SMark de Wever 
471a4800735SMark de Wever   // Too few code units
472a4800735SMark de Wever   test_format(R"("\x{df}")"sv, "{:?}", "\xdf");
473a4800735SMark de Wever   test_format(R"("\x{ef}")"sv, "{:?}", "\xef");
474a4800735SMark de Wever   test_format(R"("\x{ef}\x{bf}")"sv, "{:?}", "\xef\xbf");
475a4800735SMark de Wever   test_format(R"("\x{f7}")"sv, "{:?}", "\xf7");
476a4800735SMark de Wever   test_format(R"("\x{f7}\x{bf}")"sv, "{:?}", "\xf7\xbf");
477a4800735SMark de Wever   test_format(R"("\x{f7}\x{bf}\x{bf}")"sv, "{:?}", "\xf7\xbf\xbf");
478a4800735SMark de Wever 
479a4800735SMark de Wever   // Invalid continuation byte
480a4800735SMark de Wever   test_format(R"("\x{df}a")"sv,
481a4800735SMark de Wever               "{:?}",
482a4800735SMark de Wever               "\xdf"
483a4800735SMark de Wever               "a");
484a4800735SMark de Wever   test_format(R"("\x{ef}a")"sv,
485a4800735SMark de Wever               "{:?}",
486a4800735SMark de Wever               "\xef"
487a4800735SMark de Wever               "a");
488a4800735SMark de Wever   test_format(R"("\x{ef}\x{bf}a")"sv,
489a4800735SMark de Wever               "{:?}",
490a4800735SMark de Wever               "\xef\xbf"
491a4800735SMark de Wever               "a");
492a4800735SMark de Wever   test_format(R"("\x{f7}a")"sv,
493a4800735SMark de Wever               "{:?}",
494a4800735SMark de Wever               "\xf7"
495a4800735SMark de Wever               "a");
496a4800735SMark de Wever   test_format(R"("\x{f7}\x{bf}a")"sv,
497a4800735SMark de Wever               "{:?}",
498a4800735SMark de Wever               "\xf7\xbf"
499a4800735SMark de Wever               "a");
500a4800735SMark de Wever   test_format(R"("\x{f7}\x{bf}\x{bf}a")"sv,
501a4800735SMark de Wever               "{:?}",
502a4800735SMark de Wever               "\xf7\xbf\xbf"
503a4800735SMark de Wever               "a");
504a4800735SMark de Wever 
50509addf9cSMark de Wever   test_format(R"("a\x{f1}\x{80}\x{80}\x{e1}\x{80}\x{c2}b")"sv,
50609addf9cSMark de Wever               "{:?}",
50709addf9cSMark de Wever               "a"
50809addf9cSMark de Wever               "\xf1\x80\x80\xe1\x80\xc2"
50909addf9cSMark de Wever               "b");
51009addf9cSMark de Wever 
511a4800735SMark de Wever   // Code unit out of range
512a4800735SMark de Wever   test_format(R"("\u{10ffff}")"sv, "{:?}", "\xf4\x8f\xbf\xbf");               // last valid code point
513a4800735SMark de Wever   test_format(R"("\x{f4}\x{90}\x{80}\x{80}")"sv, "{:?}", "\xf4\x90\x80\x80"); // first invalid code point
514a4800735SMark de Wever   test_format(R"("\x{f5}\x{b1}\x{b2}\x{b3}")"sv, "{:?}", "\xf5\xb1\xb2\xb3");
515a4800735SMark de Wever   test_format(R"("\x{f7}\x{bf}\x{bf}\x{bf}")"sv, "{:?}", "\xf7\xbf\xbf\xbf"); // largest encoded code point
516a4800735SMark de Wever }
517a4800735SMark de Wever 
518a4800735SMark de Wever #ifndef TEST_HAS_NO_WIDE_CHARACTERS
51969a10e0eSStephan T. Lavavej #  ifdef TEST_SHORT_WCHAR
520a4800735SMark de Wever static void test_ill_formed_utf16() {
521a4800735SMark de Wever   using namespace std::literals;
522a4800735SMark de Wever 
523a4800735SMark de Wever   // Too few code units
524a4800735SMark de Wever   test_format(LR"("\x{d800}")"sv, L"{:?}", L"\xd800");
525a4800735SMark de Wever   test_format(LR"("\x{dbff}")"sv, L"{:?}", L"\xdbff");
526a4800735SMark de Wever 
527a4800735SMark de Wever   // Start with low surrogate pair
528a4800735SMark de Wever   test_format(LR"("\x{dc00}a")"sv,
529a4800735SMark de Wever               L"{:?}",
530a4800735SMark de Wever               L"\xdc00"
531a4800735SMark de Wever               "a");
532a4800735SMark de Wever   test_format(LR"("\x{dfff}a")"sv,
533a4800735SMark de Wever               L"{:?}",
534a4800735SMark de Wever               L"\xdfff"
535a4800735SMark de Wever               "a");
536a4800735SMark de Wever 
537a4800735SMark de Wever   // Only high surrogate pair
538a4800735SMark de Wever   test_format(LR"("\x{d800}a")"sv,
539a4800735SMark de Wever               L"{:?}",
540a4800735SMark de Wever               L"\xd800"
541a4800735SMark de Wever               "a");
542a4800735SMark de Wever   test_format(LR"("\x{dbff}a")"sv,
543a4800735SMark de Wever               L"{:?}",
544a4800735SMark de Wever               L"\xdbff"
545a4800735SMark de Wever               "a");
546a4800735SMark de Wever }
54769a10e0eSStephan T. Lavavej #  else // TEST_SHORT_WCHAR
548a4800735SMark de Wever static void test_ill_formed_utf32() {
549a4800735SMark de Wever   using namespace std::literals;
550a4800735SMark de Wever 
551a4800735SMark de Wever   test_format(LR"("\u{10ffff}")"sv, L"{:?}", L"\x10ffff");     // last valid code point
552a4800735SMark de Wever   test_format(LR"("\x{110000}")"sv, L"{:?}", L"\x110000");     // first invalid code point
553a4800735SMark de Wever   test_format(LR"("\x{ffffffff}")"sv, L"{:?}", L"\xffffffff"); // largest encoded code point
554a4800735SMark de Wever }
555a4800735SMark de Wever 
55669a10e0eSStephan T. Lavavej #  endif // TEST_SHORT_WCHAR
557a4800735SMark de Wever #endif   // TEST_HAS_NO_WIDE_CHARACTERS
558a4800735SMark de Wever 
559a4800735SMark de Wever int main(int, char**) {
560a4800735SMark de Wever   test<char>();
561a4800735SMark de Wever #ifndef TEST_HAS_NO_WIDE_CHARACTERS
562a4800735SMark de Wever   test<wchar_t>();
563a4800735SMark de Wever #endif
564a4800735SMark de Wever 
565a4800735SMark de Wever   test_ill_formed_utf8();
566a4800735SMark de Wever #ifndef TEST_HAS_NO_WIDE_CHARACTERS
56769a10e0eSStephan T. Lavavej #  ifdef TEST_SHORT_WCHAR
568a4800735SMark de Wever   test_ill_formed_utf16();
56969a10e0eSStephan T. Lavavej #  else  // TEST_SHORT_WCHAR
570a4800735SMark de Wever   test_ill_formed_utf32();
57169a10e0eSStephan T. Lavavej #  endif // TEST_SHORT_WCHAR
572a4800735SMark de Wever #endif   // TEST_HAS_NO_WIDE_CHARACTERS
573a4800735SMark de Wever 
574a4800735SMark de Wever   return 0;
575a4800735SMark de Wever }
576