1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // UNSUPPORTED: c++03, c++11, c++14, c++17 10 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME 11 12 // This version runs the test when the platform has Unicode support. 13 // UNSUPPORTED: libcpp-has-no-unicode 14 15 // TODO FMT This test should not require std::to_chars(floating-point) 16 // XFAIL: availability-fp_to_chars-missing 17 18 // <format> 19 20 // Tests the Unicode width support of the standard format specifiers. 21 // It tests [format.string.std]/8 - 11: 22 // - Properly determining the estimated with of a unicode string. 23 // - Properly truncating to the wanted maximum width. 24 25 // More specific extended grapheme cluster boundary rules are tested in 26 // test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp 27 // this test is based on test data provided by the Unicode Consortium. 28 29 #include <format> 30 #include <cassert> 31 #include <vector> 32 33 #include "make_string.h" 34 #include "test_macros.h" 35 #include "string_literal.h" 36 #include "test_format_string.h" 37 38 #ifndef TEST_HAS_NO_LOCALIZATION 39 # include <iostream> 40 # include <type_traits> 41 #endif 42 43 #define SV(S) MAKE_STRING_VIEW(CharT, S) 44 45 template < class CharT, class... Args> 46 void check(std::basic_string_view<CharT> expected, test_format_string<CharT, Args...> fmt, Args&&... args) { 47 std::basic_string<CharT> out = std::format(fmt, std::forward<Args>(args)...); 48 #ifndef TEST_HAS_NO_LOCALIZATION 49 if constexpr (std::same_as<CharT, char>) 50 if (out != expected) 51 std::cerr << "\nFormat string " << fmt.get() << "\nExpected output " << expected << "\nActual output " << out 52 << '\n'; 53 #endif 54 assert(out == expected); 55 }; 56 57 template <class CharT> 58 static void test_single_code_point_fill() { 59 //*** 1-byte code points *** 60 check(SV("* *"), SV("{:*^3}"), SV(" ")); 61 check(SV("*~*"), SV("{:*^3}"), SV("~")); 62 63 //*** 2-byte code points *** 64 check(SV("*\u00a1*"), SV("{:*^3}"), SV("\u00a1")); // INVERTED EXCLAMATION MARK 65 check(SV("*\u07ff*"), SV("{:*^3}"), SV("\u07ff")); // NKO TAMAN SIGN 66 67 //*** 3-byte code points *** 68 check(SV("*\u0800*"), SV("{:*^3}"), SV("\u0800")); // SAMARITAN LETTER ALAF 69 check(SV("*\ufffd*"), SV("{:*^3}"), SV("\ufffd")); // REPLACEMENT CHARACTER 70 71 // 2 column ranges 72 check(SV("*\u1100*"), SV("{:*^4}"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK 73 check(SV("*\u115f*"), SV("{:*^4}"), SV("\u115f")); // HANGUL CHOSEONG FILLER 74 75 check(SV("*\u2329*"), SV("{:*^4}"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET 76 check(SV("*\u232a*"), SV("{:*^4}"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET 77 78 check(SV("*\u2e80*"), SV("{:*^4}"), SV("\u2e80")); // CJK RADICAL REPEAT 79 check(SV("*\u303e*"), SV("{:*^4}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR 80 81 check(SV("*\u3041*"), SV("{:*^4}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A 82 check(SV("*\ua4d0*"), SV("{:*^3}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA 83 84 check(SV("*\uac00*"), SV("{:*^4}"), SV("\uac00")); // <Hangul Syllable, First> 85 check(SV("*\ud7a3*"), SV("{:*^4}"), SV("\ud7a3")); // Hangul Syllable Hih 86 87 check(SV("*\uf900*"), SV("{:*^4}"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900 88 check(SV("*\ufaff*"), SV("{:*^4}"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF 89 90 check(SV("*\ufe10*"), SV("{:*^4}"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA 91 check(SV("*\ufe19*"), SV("{:*^4}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 92 93 check(SV("*\ufe30*"), SV("{:*^4}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER 94 check(SV("*\ufe70*"), SV("{:*^3}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM 95 96 check(SV("*\uff01*"), SV("{:*^4}"), SV("\uff01")); // U+FF01 FULLWIDTH EXCLAMATION MARK 97 check(SV("*\uff60*"), SV("{:*^4}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS 98 99 check(SV("*\uffe0*"), SV("{:*^4}"), SV("\uffe0")); // FULLWIDTH CENT SIGN 100 check(SV("*\uffe6*"), SV("{:*^4}"), SV("\uffe6")); // FULLWIDTH WON SIGN 101 102 //*** 4-byte code points *** 103 check(SV("*\U00010000*"), SV("{:*^3}"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A 104 check(SV("*\U0010FFFF*"), SV("{:*^3}"), SV("\U0010FFFF")); // Undefined Character 105 106 // 2 column ranges 107 check(SV("*\U0001f300*"), SV("{:*^4}"), SV("\U0001f300")); // CYCLONE 108 check(SV("*\U0001f64f*"), SV("{:*^4}"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS 109 check(SV("*\U0001f900*"), SV("{:*^4}"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS 110 check(SV("*\U0001f9ff*"), SV("{:*^4}"), SV("\U0001f9ff")); // NAZAR AMULET 111 check(SV("*\U00020000*"), SV("{:*^4}"), SV("\U00020000")); // <CJK Ideograph Extension B, First> 112 check(SV("*\U0002fffd*"), SV("{:*^4}"), SV("\U0002fffd")); // Undefined Character 113 check(SV("*\U00030000*"), SV("{:*^4}"), SV("\U00030000")); // <CJK Ideograph Extension G, First> 114 check(SV("*\U0003fffd*"), SV("{:*^4}"), SV("\U0003fffd")); // Undefined Character 115 } 116 117 // One column output is unaffected. 118 // Two column output is removed, thus the result is only the fill character. 119 template <class CharT> 120 static void test_single_code_point_truncate() { 121 //*** 1-byte code points *** 122 check(SV("* *"), SV("{:*^3.1}"), SV(" ")); 123 check(SV("*~*"), SV("{:*^3.1}"), SV("~")); 124 125 //*** 2-byte code points *** 126 check(SV("*\u00a1*"), SV("{:*^3.1}"), SV("\u00a1")); // INVERTED EXCLAMATION MARK 127 check(SV("*\u07ff*"), SV("{:*^3.1}"), SV("\u07ff")); // NKO TAMAN SIGN 128 129 //*** 3.1-byte code points *** 130 check(SV("*\u0800*"), SV("{:*^3.1}"), SV("\u0800")); // SAMARITAN LETTER ALAF 131 check(SV("*\ufffd*"), SV("{:*^3.1}"), SV("\ufffd")); // REPLACEMENT CHARACTER 132 133 // 2 column ranges 134 check(SV("***"), SV("{:*^3.1}"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK 135 check(SV("***"), SV("{:*^3.1}"), SV("\u115f")); // HANGUL CHOSEONG FILLER 136 137 check(SV("***"), SV("{:*^3.1}"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET 138 check(SV("***"), SV("{:*^3.1}"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET 139 140 check(SV("***"), SV("{:*^3.1}"), SV("\u2e80")); // CJK RADICAL REPEAT 141 check(SV("***"), SV("{:*^3.1}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR 142 143 check(SV("***"), SV("{:*^3.1}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A 144 check(SV("*\ua4d0*"), SV("{:*^3.1}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA 145 146 check(SV("***"), SV("{:*^3.1}"), SV("\uac00")); // <Hangul Syllable, First> 147 check(SV("***"), SV("{:*^3.1}"), SV("\ud7a3")); // Hangul Syllable Hih 148 149 check(SV("***"), SV("{:*^3.1}"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900 150 check(SV("***"), SV("{:*^3.1}"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF 151 152 check(SV("***"), SV("{:*^3.1}"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA 153 check(SV("***"), SV("{:*^3.1}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 154 155 check(SV("***"), SV("{:*^3.1}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER 156 check(SV("*\ufe70*"), SV("{:*^3.1}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM 157 158 check(SV("***"), SV("{:*^3.1}"), SV("\uff01")); // U+FF01 FULLWIDTH EXCLAMATION MARK 159 check(SV("***"), SV("{:*^3.1}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS 160 161 check(SV("***"), SV("{:*^3.1}"), SV("\uffe0")); // FULLWIDTH CENT SIGN 162 check(SV("***"), SV("{:*^3.1}"), SV("\uffe6")); // FULLWIDTH WON SIGN 163 164 //*** 3.1-byte code points *** 165 check(SV("*\U00010000*"), SV("{:*^3.1}"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A 166 check(SV("*\U0010FFFF*"), SV("{:*^3.1}"), SV("\U0010FFFF")); // Undefined Character 167 168 // 2 column ranges 169 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f300")); // CYCLONE 170 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS 171 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS 172 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f9ff")); // NAZAR AMULET 173 check(SV("***"), SV("{:*^3.1}"), SV("\U00020000")); // <CJK Ideograph Extension B, First> 174 check(SV("***"), SV("{:*^3.1}"), SV("\U0002fffd")); // Undefined Character 175 check(SV("***"), SV("{:*^3.1}"), SV("\U00030000")); // <CJK Ideograph Extension G, First> 176 check(SV("***"), SV("{:*^3.1}"), SV("\U0003fffd")); // Undefined Character 177 } 178 179 // The examples used in that paper. 180 template <class CharT> 181 static void test_P1868() { 182 // Fill 183 check(SV("*\u0041*"), SV("{:*^3}"), SV("\u0041")); // { LATIN CAPITAL LETTER A } 184 check(SV("*\u00c1*"), SV("{:*^3}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } 185 check(SV("*\u0041\u0301*"), 186 SV("{:*^3}"), 187 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } 188 check(SV("*\u0132*"), SV("{:*^3}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ } 189 check(SV("*\u0394*"), SV("{:*^3}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA } 190 191 check(SV("*\u0429*"), SV("{:*^3}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } 192 check(SV("*\u05d0*"), SV("{:*^3}"), SV("\u05d0")); // { HEBREW LETTER ALEF } 193 check(SV("*\u0634*"), SV("{:*^3}"), SV("\u0634")); // { ARABIC LETTER SHEEN } 194 check(SV("*\u3009*"), SV("{:*^4}"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } 195 check(SV("*\u754c*"), SV("{:*^4}"), SV("\u754c")); // { CJK Unified Ideograph-754C } 196 check(SV("*\U0001f921*"), SV("{:*^4}"), SV("\U0001f921")); // { UNICORN FACE } 197 check(SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"), 198 SV("{:*^4}"), 199 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } 200 201 // Truncate to 1 column: 1 column grapheme clusters are kept together. 202 check(SV("*\u0041*"), SV("{:*^3.1}"), SV("\u0041")); // { LATIN CAPITAL LETTER A } 203 check(SV("*\u00c1*"), SV("{:*^3.1}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } 204 check(SV("*\u0041\u0301*"), 205 SV("{:*^3.1}"), 206 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } 207 check(SV("*\u0132*"), SV("{:*^3.1}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ } 208 check(SV("*\u0394*"), SV("{:*^3.1}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA } 209 210 check(SV("*\u0429*"), SV("{:*^3.1}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } 211 check(SV("*\u05d0*"), SV("{:*^3.1}"), SV("\u05d0")); // { HEBREW LETTER ALEF } 212 check(SV("*\u0634*"), SV("{:*^3.1}"), SV("\u0634")); // { ARABIC LETTER SHEEN } 213 check(SV("***"), SV("{:*^3.1}"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } 214 check(SV("***"), SV("{:*^3.1}"), SV("\u754c")); // { CJK Unified Ideograph-754C } 215 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f921")); // { UNICORN FACE } 216 check(SV("***"), 217 SV("{:*^3.1}"), 218 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } 219 220 // Truncate to 2 column: 2 column grapheme clusters are kept together. 221 check(SV("*\u0041*"), SV("{:*^3.2}"), SV("\u0041")); // { LATIN CAPITAL LETTER A } 222 check(SV("*\u00c1*"), SV("{:*^3.2}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } 223 check(SV("*\u0041\u0301*"), 224 SV("{:*^3.2}"), 225 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } 226 check(SV("*\u0132*"), SV("{:*^3.2}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ } 227 check(SV("*\u0394*"), SV("{:*^3.2}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA } 228 229 check(SV("*\u0429*"), SV("{:*^3.2}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } 230 check(SV("*\u05d0*"), SV("{:*^3.2}"), SV("\u05d0")); // { HEBREW LETTER ALEF } 231 check(SV("*\u0634*"), SV("{:*^3.2}"), SV("\u0634")); // { ARABIC LETTER SHEEN } 232 check(SV("*\u3009*"), SV("{:*^4.2}"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } 233 check(SV("*\u754c*"), SV("{:*^4.2}"), SV("\u754c")); // { CJK Unified Ideograph-754C } 234 check(SV("*\U0001f921*"), SV("{:*^4.2}"), SV("\U0001f921")); // { UNICORN FACE } 235 check(SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"), 236 SV("{:*^4.2}"), 237 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } 238 } 239 240 #ifdef _LIBCPP_VERSION 241 // Tests the libc++ specific behaviour for malformed UTF-sequences. The 242 // Standard doesn't specify how to handle this. 243 template <class CharT> 244 static void test_malformed_code_point() { 245 if constexpr (sizeof(CharT) == 1) { 246 // Malformed at end. 247 check(SV("*ZZZZ\x8f*"), SV("{:*^7}"), SV("ZZZZ\x8f")); 248 check(SV("*ZZZZ\xcf*"), SV("{:*^7}"), SV("ZZZZ\xcf")); 249 check(SV("*ZZZZ\xef*"), SV("{:*^7}"), SV("ZZZZ\xef")); 250 check(SV("*ZZZZ\xff*"), SV("{:*^7}"), SV("ZZZZ\xff")); 251 252 // Malformed in middle, no continuation 253 check(SV("*ZZZZ\x8fZ*"), SV("{:*^8}"), SV("ZZZZ\x8fZ")); 254 check(SV("*ZZZZ\xcfZ*"), SV("{:*^8}"), SV("ZZZZ\xcfZ")); 255 check(SV("*ZZZZ\xefZ*"), SV("{:*^8}"), SV("ZZZZ\xefZ")); 256 check(SV("*ZZZZ\xffZ*"), SV("{:*^8}"), SV("ZZZZ\xffZ")); 257 258 check(SV("*ZZZZ\x8fZZ*"), SV("{:*^9}"), SV("ZZZZ\x8fZZ")); 259 check(SV("*ZZZZ\xcfZZ*"), SV("{:*^9}"), SV("ZZZZ\xcfZZ")); 260 check(SV("*ZZZZ\xefZZ*"), SV("{:*^9}"), SV("ZZZZ\xefZZ")); 261 check(SV("*ZZZZ\xffZZ*"), SV("{:*^9}"), SV("ZZZZ\xffZZ")); 262 263 check(SV("*ZZZZ\x8fZZZ*"), SV("{:*^10}"), SV("ZZZZ\x8fZZZ")); 264 check(SV("*ZZZZ\xcfZZZ*"), SV("{:*^10}"), SV("ZZZZ\xcfZZZ")); 265 check(SV("*ZZZZ\xefZZZ*"), SV("{:*^10}"), SV("ZZZZ\xefZZZ")); 266 check(SV("*ZZZZ\xffZZZ*"), SV("{:*^10}"), SV("ZZZZ\xffZZZ")); 267 268 check(SV("*ZZZZ\x8fZZZZ*"), SV("{:*^11}"), SV("ZZZZ\x8fZZZZ")); 269 check(SV("*ZZZZ\xcfZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xcfZZZZ")); 270 check(SV("*ZZZZ\xefZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xefZZZZ")); 271 check(SV("*ZZZZ\xffZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xffZZZZ")); 272 273 // Invalid continuations 274 check(SV("\xc2\x00"), SV("{}"), SV("\xc2\x00")); 275 check(SV("\xc2\x40"), SV("{}"), SV("\xc2\x40")); 276 check(SV("\xc2\xc0"), SV("{}"), SV("\xc2\xc0")); 277 278 check(SV("\xe0\x00\x80"), SV("{}"), SV("\xe0\x00\x80")); 279 check(SV("\xe0\x40\x80"), SV("{}"), SV("\xe0\x40\x80")); 280 check(SV("\xe0\xc0\x80"), SV("{}"), SV("\xe0\xc0\x80")); 281 282 check(SV("\xe0\x80\x00"), SV("{}"), SV("\xe0\x80\x00")); 283 check(SV("\xe0\x80\x40"), SV("{}"), SV("\xe0\x80\x40")); 284 check(SV("\xe0\x80\xc0"), SV("{}"), SV("\xe0\x80\xc0")); 285 286 check(SV("\xf0\x80\x80\x00"), SV("{}"), SV("\xf0\x80\x80\x00")); 287 check(SV("\xf0\x80\x80\x40"), SV("{}"), SV("\xf0\x80\x80\x40")); 288 check(SV("\xf0\x80\x80\xc0"), SV("{}"), SV("\xf0\x80\x80\xc0")); 289 290 check(SV("\xf0\x80\x00\x80"), SV("{}"), SV("\xf0\x80\x00\x80")); 291 check(SV("\xf0\x80\x40\x80"), SV("{}"), SV("\xf0\x80\x40\x80")); 292 check(SV("\xf0\x80\xc0\x80"), SV("{}"), SV("\xf0\x80\xc0\x80")); 293 294 check(SV("\xf0\x00\x80\x80"), SV("{}"), SV("\xf0\x00\x80\x80")); 295 check(SV("\xf0\x40\x80\x80"), SV("{}"), SV("\xf0\x40\x80\x80")); 296 check(SV("\xf0\xc0\x80\x80"), SV("{}"), SV("\xf0\xc0\x80\x80")); 297 298 // Premature end. 299 check(SV("*ZZZZ\xef\xf5*"), SV("{:*^8}"), SV("ZZZZ\xef\xf5")); 300 check(SV("*ZZZZ\xef\xf5ZZZZ*"), SV("{:*^12}"), SV("ZZZZ\xef\xf5ZZZZ")); 301 check(SV("*ZZZZ\xff\xf5\xf5*"), SV("{:*^9}"), SV("ZZZZ\xff\xf5\xf5")); 302 check(SV("*ZZZZ\xff\xf5\xf5ZZZZ*"), SV("{:*^13}"), SV("ZZZZ\xff\xf5\xf5ZZZZ")); 303 304 } else if constexpr (sizeof(CharT) == 2) { 305 // TODO FMT Add these tests. 306 } 307 // UTF-32 doesn't combine characters, thus no corruption tests. 308 } 309 #endif 310 311 template <class CharT> 312 static void test() { 313 test_single_code_point_fill<CharT>(); 314 test_single_code_point_truncate<CharT>(); 315 test_P1868<CharT>(); 316 317 #ifdef _LIBCPP_VERSION 318 test_malformed_code_point<CharT>(); 319 #endif 320 } 321 322 int main(int, char**) { 323 test<char>(); 324 325 #ifndef TEST_HAS_NO_WIDE_CHARACTERS 326 test<wchar_t>(); 327 #endif 328 329 return 0; 330 } 331