xref: /llvm-project/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp (revision 6a54dfbfe534276d644d7f9c027f0deeb748dd53)
1857a78c0SMark de Wever //===----------------------------------------------------------------------===//
2*6a54dfbfSLouis Dionne //
3857a78c0SMark de Wever // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4857a78c0SMark de Wever // See https://llvm.org/LICENSE.txt for license information.
5857a78c0SMark de Wever // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6857a78c0SMark de Wever //
7857a78c0SMark de Wever //===----------------------------------------------------------------------===//
8857a78c0SMark de Wever 
9857a78c0SMark de Wever // UNSUPPORTED: c++03, c++11, c++14, c++17
10520c7fbbSLouis Dionne // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
11857a78c0SMark de Wever 
12857a78c0SMark de Wever // This version runs the test when the platform has Unicode support.
13857a78c0SMark de Wever // UNSUPPORTED: libcpp-has-no-unicode
14857a78c0SMark de Wever 
153d334df5SLouis Dionne // TODO FMT This test should not require std::to_chars(floating-point)
16f0fc8c48SLouis Dionne // XFAIL: availability-fp_to_chars-missing
173d334df5SLouis Dionne 
18857a78c0SMark de Wever // <format>
19857a78c0SMark de Wever 
20857a78c0SMark de Wever // Tests the Unicode width support of the standard format specifiers.
21857a78c0SMark de Wever // It tests [format.string.std]/8 - 11:
22857a78c0SMark de Wever // - Properly determining the estimated with of a unicode string.
23857a78c0SMark de Wever // - Properly truncating to the wanted maximum width.
24857a78c0SMark de Wever 
25857a78c0SMark de Wever // More specific extended grapheme cluster boundary rules are tested in
26857a78c0SMark de Wever // test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp
27857a78c0SMark de Wever // this test is based on test data provided by the Unicode Consortium.
28857a78c0SMark de Wever 
29857a78c0SMark de Wever #include <format>
30857a78c0SMark de Wever #include <cassert>
31857a78c0SMark de Wever #include <vector>
32857a78c0SMark de Wever 
33857a78c0SMark de Wever #include "make_string.h"
34857a78c0SMark de Wever #include "test_macros.h"
35857a78c0SMark de Wever #include "string_literal.h"
366195bdb9SMark de Wever #include "test_format_string.h"
37857a78c0SMark de Wever 
38857a78c0SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION
39857a78c0SMark de Wever #  include <iostream>
40857a78c0SMark de Wever #  include <type_traits>
41857a78c0SMark de Wever #endif
42857a78c0SMark de Wever 
43857a78c0SMark de Wever #define SV(S) MAKE_STRING_VIEW(CharT, S)
44857a78c0SMark de Wever 
456195bdb9SMark de Wever template < class CharT, class... Args>
466195bdb9SMark de Wever void check(std::basic_string_view<CharT> expected, test_format_string<CharT, Args...> fmt, Args&&... args) {
476195bdb9SMark de Wever   std::basic_string<CharT> out = std::format(fmt, std::forward<Args>(args)...);
48857a78c0SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION
49857a78c0SMark de Wever   if constexpr (std::same_as<CharT, char>)
50857a78c0SMark de Wever     if (out != expected)
516195bdb9SMark de Wever       std::cerr << "\nFormat string   " << fmt.get() << "\nExpected output " << expected << "\nActual output   " << out
526195bdb9SMark de Wever                 << '\n';
53857a78c0SMark de Wever #endif
54857a78c0SMark de Wever   assert(out == expected);
55857a78c0SMark de Wever };
56857a78c0SMark de Wever 
57857a78c0SMark de Wever template <class CharT>
58857a78c0SMark de Wever static void test_single_code_point_fill() {
59857a78c0SMark de Wever   //*** 1-byte code points ***
606195bdb9SMark de Wever   check(SV("* *"), SV("{:*^3}"), SV(" "));
616195bdb9SMark de Wever   check(SV("*~*"), SV("{:*^3}"), SV("~"));
62857a78c0SMark de Wever 
63857a78c0SMark de Wever   //*** 2-byte code points ***
646195bdb9SMark de Wever   check(SV("*\u00a1*"), SV("{:*^3}"), SV("\u00a1")); // INVERTED EXCLAMATION MARK
656195bdb9SMark de Wever   check(SV("*\u07ff*"), SV("{:*^3}"), SV("\u07ff")); // NKO TAMAN SIGN
66857a78c0SMark de Wever 
67857a78c0SMark de Wever   //*** 3-byte code points ***
686195bdb9SMark de Wever   check(SV("*\u0800*"), SV("{:*^3}"), SV("\u0800")); // SAMARITAN LETTER ALAF
696195bdb9SMark de Wever   check(SV("*\ufffd*"), SV("{:*^3}"), SV("\ufffd")); // REPLACEMENT CHARACTER
70857a78c0SMark de Wever 
71857a78c0SMark de Wever   // 2 column ranges
726195bdb9SMark de Wever   check(SV("*\u1100*"), SV("{:*^4}"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK
736195bdb9SMark de Wever   check(SV("*\u115f*"), SV("{:*^4}"), SV("\u115f")); // HANGUL CHOSEONG FILLER
74857a78c0SMark de Wever 
756195bdb9SMark de Wever   check(SV("*\u2329*"), SV("{:*^4}"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET
766195bdb9SMark de Wever   check(SV("*\u232a*"), SV("{:*^4}"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET
77857a78c0SMark de Wever 
786195bdb9SMark de Wever   check(SV("*\u2e80*"), SV("{:*^4}"), SV("\u2e80")); // CJK RADICAL REPEAT
796195bdb9SMark de Wever   check(SV("*\u303e*"), SV("{:*^4}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR
80857a78c0SMark de Wever 
8168c3d66aSMark de Wever   check(SV("*\u3041*"), SV("{:*^4}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A
8268c3d66aSMark de Wever   check(SV("*\ua4d0*"), SV("{:*^3}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA
83857a78c0SMark de Wever 
846195bdb9SMark de Wever   check(SV("*\uac00*"), SV("{:*^4}"), SV("\uac00")); // <Hangul Syllable, First>
856195bdb9SMark de Wever   check(SV("*\ud7a3*"), SV("{:*^4}"), SV("\ud7a3")); // Hangul Syllable Hih
86857a78c0SMark de Wever 
876195bdb9SMark de Wever   check(SV("*\uf900*"), SV("{:*^4}"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900
886195bdb9SMark de Wever   check(SV("*\ufaff*"), SV("{:*^4}"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF
89857a78c0SMark de Wever 
906195bdb9SMark de Wever   check(SV("*\ufe10*"), SV("{:*^4}"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA
916195bdb9SMark de Wever   check(SV("*\ufe19*"), SV("{:*^4}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
92857a78c0SMark de Wever 
936195bdb9SMark de Wever   check(SV("*\ufe30*"), SV("{:*^4}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
9468c3d66aSMark de Wever   check(SV("*\ufe70*"), SV("{:*^3}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM
95857a78c0SMark de Wever 
9668c3d66aSMark de Wever   check(SV("*\uff01*"), SV("{:*^4}"), SV("\uff01")); // U+FF01 FULLWIDTH EXCLAMATION MARK
976195bdb9SMark de Wever   check(SV("*\uff60*"), SV("{:*^4}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS
98857a78c0SMark de Wever 
996195bdb9SMark de Wever   check(SV("*\uffe0*"), SV("{:*^4}"), SV("\uffe0")); // FULLWIDTH CENT SIGN
1006195bdb9SMark de Wever   check(SV("*\uffe6*"), SV("{:*^4}"), SV("\uffe6")); // FULLWIDTH WON SIGN
101857a78c0SMark de Wever 
102857a78c0SMark de Wever   //*** 4-byte code points ***
1036195bdb9SMark de Wever   check(SV("*\U00010000*"), SV("{:*^3}"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A
1046195bdb9SMark de Wever   check(SV("*\U0010FFFF*"), SV("{:*^3}"), SV("\U0010FFFF")); // Undefined Character
105857a78c0SMark de Wever 
106857a78c0SMark de Wever   // 2 column ranges
1076195bdb9SMark de Wever   check(SV("*\U0001f300*"), SV("{:*^4}"), SV("\U0001f300")); // CYCLONE
1086195bdb9SMark de Wever   check(SV("*\U0001f64f*"), SV("{:*^4}"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS
1096195bdb9SMark de Wever   check(SV("*\U0001f900*"), SV("{:*^4}"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS
1106195bdb9SMark de Wever   check(SV("*\U0001f9ff*"), SV("{:*^4}"), SV("\U0001f9ff")); // NAZAR AMULET
1116195bdb9SMark de Wever   check(SV("*\U00020000*"), SV("{:*^4}"), SV("\U00020000")); // <CJK Ideograph Extension B, First>
1126195bdb9SMark de Wever   check(SV("*\U0002fffd*"), SV("{:*^4}"), SV("\U0002fffd")); // Undefined Character
1136195bdb9SMark de Wever   check(SV("*\U00030000*"), SV("{:*^4}"), SV("\U00030000")); // <CJK Ideograph Extension G, First>
1146195bdb9SMark de Wever   check(SV("*\U0003fffd*"), SV("{:*^4}"), SV("\U0003fffd")); // Undefined Character
115857a78c0SMark de Wever }
116857a78c0SMark de Wever 
117857a78c0SMark de Wever // One column output is unaffected.
118857a78c0SMark de Wever // Two column output is removed, thus the result is only the fill character.
119857a78c0SMark de Wever template <class CharT>
120857a78c0SMark de Wever static void test_single_code_point_truncate() {
121857a78c0SMark de Wever   //*** 1-byte code points ***
1226195bdb9SMark de Wever   check(SV("* *"), SV("{:*^3.1}"), SV(" "));
1236195bdb9SMark de Wever   check(SV("*~*"), SV("{:*^3.1}"), SV("~"));
124857a78c0SMark de Wever 
125857a78c0SMark de Wever   //*** 2-byte code points ***
1266195bdb9SMark de Wever   check(SV("*\u00a1*"), SV("{:*^3.1}"), SV("\u00a1")); // INVERTED EXCLAMATION MARK
1276195bdb9SMark de Wever   check(SV("*\u07ff*"), SV("{:*^3.1}"), SV("\u07ff")); // NKO TAMAN SIGN
128857a78c0SMark de Wever 
129857a78c0SMark de Wever   //*** 3.1-byte code points ***
1306195bdb9SMark de Wever   check(SV("*\u0800*"), SV("{:*^3.1}"), SV("\u0800")); // SAMARITAN LETTER ALAF
1316195bdb9SMark de Wever   check(SV("*\ufffd*"), SV("{:*^3.1}"), SV("\ufffd")); // REPLACEMENT CHARACTER
132857a78c0SMark de Wever 
133857a78c0SMark de Wever   // 2 column ranges
1346195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK
1356195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\u115f")); // HANGUL CHOSEONG FILLER
136857a78c0SMark de Wever 
1376195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET
1386195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET
139857a78c0SMark de Wever 
1406195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\u2e80")); // CJK RADICAL REPEAT
1416195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR
142857a78c0SMark de Wever 
14368c3d66aSMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A
14468c3d66aSMark de Wever   check(SV("*\ua4d0*"), SV("{:*^3.1}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA
145857a78c0SMark de Wever 
1466195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\uac00")); // <Hangul Syllable, First>
1476195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\ud7a3")); // Hangul Syllable Hih
148857a78c0SMark de Wever 
1496195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900
1506195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF
151857a78c0SMark de Wever 
1526195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA
1536195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
154857a78c0SMark de Wever 
1556195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
15668c3d66aSMark de Wever   check(SV("*\ufe70*"), SV("{:*^3.1}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM
157857a78c0SMark de Wever 
15868c3d66aSMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\uff01"));      // U+FF01 FULLWIDTH EXCLAMATION MARK
1596195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS
160857a78c0SMark de Wever 
1616195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\uffe0")); // FULLWIDTH CENT SIGN
1626195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\uffe6")); // FULLWIDTH WON SIGN
163857a78c0SMark de Wever 
164857a78c0SMark de Wever   //*** 3.1-byte code points ***
1656195bdb9SMark de Wever   check(SV("*\U00010000*"), SV("{:*^3.1}"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A
1666195bdb9SMark de Wever   check(SV("*\U0010FFFF*"), SV("{:*^3.1}"), SV("\U0010FFFF")); // Undefined Character
167857a78c0SMark de Wever 
168857a78c0SMark de Wever   // 2 column ranges
1696195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\U0001f300")); // CYCLONE
1706195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS
1716195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS
1726195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\U0001f9ff")); // NAZAR AMULET
1736195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\U00020000")); // <CJK Ideograph Extension B, First>
1746195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\U0002fffd")); // Undefined Character
1756195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\U00030000")); // <CJK Ideograph Extension G, First>
1766195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\U0003fffd")); // Undefined Character
177857a78c0SMark de Wever }
178857a78c0SMark de Wever 
179857a78c0SMark de Wever // The examples used in that paper.
180857a78c0SMark de Wever template <class CharT>
181857a78c0SMark de Wever static void test_P1868() {
182857a78c0SMark de Wever   // Fill
1836195bdb9SMark de Wever   check(SV("*\u0041*"), SV("{:*^3}"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
1846195bdb9SMark de Wever   check(SV("*\u00c1*"), SV("{:*^3}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
1856195bdb9SMark de Wever   check(SV("*\u0041\u0301*"),
1866195bdb9SMark de Wever         SV("{:*^3}"),
187857a78c0SMark de Wever         SV("\u0041\u0301"));                         // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
1886195bdb9SMark de Wever   check(SV("*\u0132*"), SV("{:*^3}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
1896195bdb9SMark de Wever   check(SV("*\u0394*"), SV("{:*^3}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
190857a78c0SMark de Wever 
1916195bdb9SMark de Wever   check(SV("*\u0429*"), SV("{:*^3}"), SV("\u0429"));         // { CYRILLIC CAPITAL LETTER SHCHA }
1926195bdb9SMark de Wever   check(SV("*\u05d0*"), SV("{:*^3}"), SV("\u05d0"));         // { HEBREW LETTER ALEF }
1936195bdb9SMark de Wever   check(SV("*\u0634*"), SV("{:*^3}"), SV("\u0634"));         // { ARABIC LETTER SHEEN }
1946195bdb9SMark de Wever   check(SV("*\u3009*"), SV("{:*^4}"), SV("\u3009"));         // { RIGHT-POINTING ANGLE BRACKET }
1956195bdb9SMark de Wever   check(SV("*\u754c*"), SV("{:*^4}"), SV("\u754c"));         // { CJK Unified Ideograph-754C }
1966195bdb9SMark de Wever   check(SV("*\U0001f921*"), SV("{:*^4}"), SV("\U0001f921")); // { UNICORN FACE }
1976195bdb9SMark de Wever   check(SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"),
1986195bdb9SMark de Wever         SV("{:*^4}"),
199857a78c0SMark de Wever         SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
200857a78c0SMark de Wever 
201857a78c0SMark de Wever   // Truncate to 1 column: 1 column grapheme clusters are kept together.
2026195bdb9SMark de Wever   check(SV("*\u0041*"), SV("{:*^3.1}"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
2036195bdb9SMark de Wever   check(SV("*\u00c1*"), SV("{:*^3.1}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
2046195bdb9SMark de Wever   check(SV("*\u0041\u0301*"),
2056195bdb9SMark de Wever         SV("{:*^3.1}"),
206857a78c0SMark de Wever         SV("\u0041\u0301"));                           // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
2076195bdb9SMark de Wever   check(SV("*\u0132*"), SV("{:*^3.1}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
2086195bdb9SMark de Wever   check(SV("*\u0394*"), SV("{:*^3.1}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
209857a78c0SMark de Wever 
2106195bdb9SMark de Wever   check(SV("*\u0429*"), SV("{:*^3.1}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA }
2116195bdb9SMark de Wever   check(SV("*\u05d0*"), SV("{:*^3.1}"), SV("\u05d0")); // { HEBREW LETTER ALEF }
2126195bdb9SMark de Wever   check(SV("*\u0634*"), SV("{:*^3.1}"), SV("\u0634")); // { ARABIC LETTER SHEEN }
2136195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\u3009"));      // { RIGHT-POINTING ANGLE BRACKET }
2146195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\u754c"));      // { CJK Unified Ideograph-754C }
2156195bdb9SMark de Wever   check(SV("***"), SV("{:*^3.1}"), SV("\U0001f921"));  // { UNICORN FACE }
2166195bdb9SMark de Wever   check(SV("***"),
2176195bdb9SMark de Wever         SV("{:*^3.1}"),
218857a78c0SMark de Wever         SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
219857a78c0SMark de Wever 
220857a78c0SMark de Wever   // Truncate to 2 column: 2 column grapheme clusters are kept together.
2216195bdb9SMark de Wever   check(SV("*\u0041*"), SV("{:*^3.2}"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
2226195bdb9SMark de Wever   check(SV("*\u00c1*"), SV("{:*^3.2}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
2236195bdb9SMark de Wever   check(SV("*\u0041\u0301*"),
2246195bdb9SMark de Wever         SV("{:*^3.2}"),
225857a78c0SMark de Wever         SV("\u0041\u0301"));                           // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
2266195bdb9SMark de Wever   check(SV("*\u0132*"), SV("{:*^3.2}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
2276195bdb9SMark de Wever   check(SV("*\u0394*"), SV("{:*^3.2}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
228857a78c0SMark de Wever 
2296195bdb9SMark de Wever   check(SV("*\u0429*"), SV("{:*^3.2}"), SV("\u0429"));         // { CYRILLIC CAPITAL LETTER SHCHA }
2306195bdb9SMark de Wever   check(SV("*\u05d0*"), SV("{:*^3.2}"), SV("\u05d0"));         // { HEBREW LETTER ALEF }
2316195bdb9SMark de Wever   check(SV("*\u0634*"), SV("{:*^3.2}"), SV("\u0634"));         // { ARABIC LETTER SHEEN }
2326195bdb9SMark de Wever   check(SV("*\u3009*"), SV("{:*^4.2}"), SV("\u3009"));         // { RIGHT-POINTING ANGLE BRACKET }
2336195bdb9SMark de Wever   check(SV("*\u754c*"), SV("{:*^4.2}"), SV("\u754c"));         // { CJK Unified Ideograph-754C }
2346195bdb9SMark de Wever   check(SV("*\U0001f921*"), SV("{:*^4.2}"), SV("\U0001f921")); // { UNICORN FACE }
2356195bdb9SMark de Wever   check(SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"),
2366195bdb9SMark de Wever         SV("{:*^4.2}"),
237857a78c0SMark de Wever         SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
238857a78c0SMark de Wever }
239857a78c0SMark de Wever 
240857a78c0SMark de Wever #ifdef _LIBCPP_VERSION
241857a78c0SMark de Wever // Tests the libc++ specific behaviour for malformed UTF-sequences. The
242857a78c0SMark de Wever // Standard doesn't specify how to handle this.
243857a78c0SMark de Wever template <class CharT>
244857a78c0SMark de Wever static void test_malformed_code_point() {
245857a78c0SMark de Wever   if constexpr (sizeof(CharT) == 1) {
246857a78c0SMark de Wever     // Malformed at end.
2476195bdb9SMark de Wever     check(SV("*ZZZZ\x8f*"), SV("{:*^7}"), SV("ZZZZ\x8f"));
2486195bdb9SMark de Wever     check(SV("*ZZZZ\xcf*"), SV("{:*^7}"), SV("ZZZZ\xcf"));
2496195bdb9SMark de Wever     check(SV("*ZZZZ\xef*"), SV("{:*^7}"), SV("ZZZZ\xef"));
2506195bdb9SMark de Wever     check(SV("*ZZZZ\xff*"), SV("{:*^7}"), SV("ZZZZ\xff"));
251857a78c0SMark de Wever 
252857a78c0SMark de Wever     // Malformed in middle, no continuation
2536195bdb9SMark de Wever     check(SV("*ZZZZ\x8fZ*"), SV("{:*^8}"), SV("ZZZZ\x8fZ"));
2546195bdb9SMark de Wever     check(SV("*ZZZZ\xcfZ*"), SV("{:*^8}"), SV("ZZZZ\xcfZ"));
2556195bdb9SMark de Wever     check(SV("*ZZZZ\xefZ*"), SV("{:*^8}"), SV("ZZZZ\xefZ"));
2566195bdb9SMark de Wever     check(SV("*ZZZZ\xffZ*"), SV("{:*^8}"), SV("ZZZZ\xffZ"));
257857a78c0SMark de Wever 
2586195bdb9SMark de Wever     check(SV("*ZZZZ\x8fZZ*"), SV("{:*^9}"), SV("ZZZZ\x8fZZ"));
2596195bdb9SMark de Wever     check(SV("*ZZZZ\xcfZZ*"), SV("{:*^9}"), SV("ZZZZ\xcfZZ"));
2606195bdb9SMark de Wever     check(SV("*ZZZZ\xefZZ*"), SV("{:*^9}"), SV("ZZZZ\xefZZ"));
2616195bdb9SMark de Wever     check(SV("*ZZZZ\xffZZ*"), SV("{:*^9}"), SV("ZZZZ\xffZZ"));
262857a78c0SMark de Wever 
2636195bdb9SMark de Wever     check(SV("*ZZZZ\x8fZZZ*"), SV("{:*^10}"), SV("ZZZZ\x8fZZZ"));
2646195bdb9SMark de Wever     check(SV("*ZZZZ\xcfZZZ*"), SV("{:*^10}"), SV("ZZZZ\xcfZZZ"));
2656195bdb9SMark de Wever     check(SV("*ZZZZ\xefZZZ*"), SV("{:*^10}"), SV("ZZZZ\xefZZZ"));
2666195bdb9SMark de Wever     check(SV("*ZZZZ\xffZZZ*"), SV("{:*^10}"), SV("ZZZZ\xffZZZ"));
267857a78c0SMark de Wever 
2686195bdb9SMark de Wever     check(SV("*ZZZZ\x8fZZZZ*"), SV("{:*^11}"), SV("ZZZZ\x8fZZZZ"));
2696195bdb9SMark de Wever     check(SV("*ZZZZ\xcfZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xcfZZZZ"));
2706195bdb9SMark de Wever     check(SV("*ZZZZ\xefZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xefZZZZ"));
2716195bdb9SMark de Wever     check(SV("*ZZZZ\xffZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xffZZZZ"));
272857a78c0SMark de Wever 
27309addf9cSMark de Wever     // Invalid continuations
27409addf9cSMark de Wever     check(SV("\xc2\x00"), SV("{}"), SV("\xc2\x00"));
27509addf9cSMark de Wever     check(SV("\xc2\x40"), SV("{}"), SV("\xc2\x40"));
27609addf9cSMark de Wever     check(SV("\xc2\xc0"), SV("{}"), SV("\xc2\xc0"));
27709addf9cSMark de Wever 
27809addf9cSMark de Wever     check(SV("\xe0\x00\x80"), SV("{}"), SV("\xe0\x00\x80"));
27909addf9cSMark de Wever     check(SV("\xe0\x40\x80"), SV("{}"), SV("\xe0\x40\x80"));
28009addf9cSMark de Wever     check(SV("\xe0\xc0\x80"), SV("{}"), SV("\xe0\xc0\x80"));
28109addf9cSMark de Wever 
28209addf9cSMark de Wever     check(SV("\xe0\x80\x00"), SV("{}"), SV("\xe0\x80\x00"));
28309addf9cSMark de Wever     check(SV("\xe0\x80\x40"), SV("{}"), SV("\xe0\x80\x40"));
28409addf9cSMark de Wever     check(SV("\xe0\x80\xc0"), SV("{}"), SV("\xe0\x80\xc0"));
28509addf9cSMark de Wever 
28609addf9cSMark de Wever     check(SV("\xf0\x80\x80\x00"), SV("{}"), SV("\xf0\x80\x80\x00"));
28709addf9cSMark de Wever     check(SV("\xf0\x80\x80\x40"), SV("{}"), SV("\xf0\x80\x80\x40"));
28809addf9cSMark de Wever     check(SV("\xf0\x80\x80\xc0"), SV("{}"), SV("\xf0\x80\x80\xc0"));
28909addf9cSMark de Wever 
29009addf9cSMark de Wever     check(SV("\xf0\x80\x00\x80"), SV("{}"), SV("\xf0\x80\x00\x80"));
29109addf9cSMark de Wever     check(SV("\xf0\x80\x40\x80"), SV("{}"), SV("\xf0\x80\x40\x80"));
29209addf9cSMark de Wever     check(SV("\xf0\x80\xc0\x80"), SV("{}"), SV("\xf0\x80\xc0\x80"));
29309addf9cSMark de Wever 
29409addf9cSMark de Wever     check(SV("\xf0\x00\x80\x80"), SV("{}"), SV("\xf0\x00\x80\x80"));
29509addf9cSMark de Wever     check(SV("\xf0\x40\x80\x80"), SV("{}"), SV("\xf0\x40\x80\x80"));
29609addf9cSMark de Wever     check(SV("\xf0\xc0\x80\x80"), SV("{}"), SV("\xf0\xc0\x80\x80"));
29709addf9cSMark de Wever 
298857a78c0SMark de Wever     // Premature end.
2996195bdb9SMark de Wever     check(SV("*ZZZZ\xef\xf5*"), SV("{:*^8}"), SV("ZZZZ\xef\xf5"));
3006195bdb9SMark de Wever     check(SV("*ZZZZ\xef\xf5ZZZZ*"), SV("{:*^12}"), SV("ZZZZ\xef\xf5ZZZZ"));
3016195bdb9SMark de Wever     check(SV("*ZZZZ\xff\xf5\xf5*"), SV("{:*^9}"), SV("ZZZZ\xff\xf5\xf5"));
3026195bdb9SMark de Wever     check(SV("*ZZZZ\xff\xf5\xf5ZZZZ*"), SV("{:*^13}"), SV("ZZZZ\xff\xf5\xf5ZZZZ"));
303857a78c0SMark de Wever 
304857a78c0SMark de Wever   } else if constexpr (sizeof(CharT) == 2) {
305857a78c0SMark de Wever     // TODO FMT Add these tests.
306857a78c0SMark de Wever   }
307857a78c0SMark de Wever   // UTF-32 doesn't combine characters, thus no corruption tests.
308857a78c0SMark de Wever }
309857a78c0SMark de Wever #endif
310857a78c0SMark de Wever 
311857a78c0SMark de Wever template <class CharT>
312857a78c0SMark de Wever static void test() {
313857a78c0SMark de Wever   test_single_code_point_fill<CharT>();
314857a78c0SMark de Wever   test_single_code_point_truncate<CharT>();
315857a78c0SMark de Wever   test_P1868<CharT>();
316857a78c0SMark de Wever 
317857a78c0SMark de Wever #ifdef _LIBCPP_VERSION
318857a78c0SMark de Wever   test_malformed_code_point<CharT>();
319857a78c0SMark de Wever #endif
320857a78c0SMark de Wever }
321857a78c0SMark de Wever 
322857a78c0SMark de Wever int main(int, char**) {
323857a78c0SMark de Wever   test<char>();
324857a78c0SMark de Wever 
325857a78c0SMark de Wever #ifndef TEST_HAS_NO_WIDE_CHARACTERS
326857a78c0SMark de Wever   test<wchar_t>();
327857a78c0SMark de Wever #endif
328857a78c0SMark de Wever 
329857a78c0SMark de Wever   return 0;
330857a78c0SMark de Wever }
331