1857a78c0SMark de Wever //===----------------------------------------------------------------------===// 2*6a54dfbfSLouis Dionne // 3857a78c0SMark de Wever // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4857a78c0SMark de Wever // See https://llvm.org/LICENSE.txt for license information. 5857a78c0SMark de Wever // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6857a78c0SMark de Wever // 7857a78c0SMark de Wever //===----------------------------------------------------------------------===// 8857a78c0SMark de Wever 9857a78c0SMark de Wever // UNSUPPORTED: c++03, c++11, c++14, c++17 10520c7fbbSLouis Dionne // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME 11857a78c0SMark de Wever 12857a78c0SMark de Wever // This version runs the test when the platform has Unicode support. 13857a78c0SMark de Wever // UNSUPPORTED: libcpp-has-no-unicode 14857a78c0SMark de Wever 153d334df5SLouis Dionne // TODO FMT This test should not require std::to_chars(floating-point) 16f0fc8c48SLouis Dionne // XFAIL: availability-fp_to_chars-missing 173d334df5SLouis Dionne 18857a78c0SMark de Wever // <format> 19857a78c0SMark de Wever 20857a78c0SMark de Wever // Tests the Unicode width support of the standard format specifiers. 21857a78c0SMark de Wever // It tests [format.string.std]/8 - 11: 22857a78c0SMark de Wever // - Properly determining the estimated with of a unicode string. 23857a78c0SMark de Wever // - Properly truncating to the wanted maximum width. 24857a78c0SMark de Wever 25857a78c0SMark de Wever // More specific extended grapheme cluster boundary rules are tested in 26857a78c0SMark de Wever // test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp 27857a78c0SMark de Wever // this test is based on test data provided by the Unicode Consortium. 28857a78c0SMark de Wever 29857a78c0SMark de Wever #include <format> 30857a78c0SMark de Wever #include <cassert> 31857a78c0SMark de Wever #include <vector> 32857a78c0SMark de Wever 33857a78c0SMark de Wever #include "make_string.h" 34857a78c0SMark de Wever #include "test_macros.h" 35857a78c0SMark de Wever #include "string_literal.h" 366195bdb9SMark de Wever #include "test_format_string.h" 37857a78c0SMark de Wever 38857a78c0SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION 39857a78c0SMark de Wever # include <iostream> 40857a78c0SMark de Wever # include <type_traits> 41857a78c0SMark de Wever #endif 42857a78c0SMark de Wever 43857a78c0SMark de Wever #define SV(S) MAKE_STRING_VIEW(CharT, S) 44857a78c0SMark de Wever 456195bdb9SMark de Wever template < class CharT, class... Args> 466195bdb9SMark de Wever void check(std::basic_string_view<CharT> expected, test_format_string<CharT, Args...> fmt, Args&&... args) { 476195bdb9SMark de Wever std::basic_string<CharT> out = std::format(fmt, std::forward<Args>(args)...); 48857a78c0SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION 49857a78c0SMark de Wever if constexpr (std::same_as<CharT, char>) 50857a78c0SMark de Wever if (out != expected) 516195bdb9SMark de Wever std::cerr << "\nFormat string " << fmt.get() << "\nExpected output " << expected << "\nActual output " << out 526195bdb9SMark de Wever << '\n'; 53857a78c0SMark de Wever #endif 54857a78c0SMark de Wever assert(out == expected); 55857a78c0SMark de Wever }; 56857a78c0SMark de Wever 57857a78c0SMark de Wever template <class CharT> 58857a78c0SMark de Wever static void test_single_code_point_fill() { 59857a78c0SMark de Wever //*** 1-byte code points *** 606195bdb9SMark de Wever check(SV("* *"), SV("{:*^3}"), SV(" ")); 616195bdb9SMark de Wever check(SV("*~*"), SV("{:*^3}"), SV("~")); 62857a78c0SMark de Wever 63857a78c0SMark de Wever //*** 2-byte code points *** 646195bdb9SMark de Wever check(SV("*\u00a1*"), SV("{:*^3}"), SV("\u00a1")); // INVERTED EXCLAMATION MARK 656195bdb9SMark de Wever check(SV("*\u07ff*"), SV("{:*^3}"), SV("\u07ff")); // NKO TAMAN SIGN 66857a78c0SMark de Wever 67857a78c0SMark de Wever //*** 3-byte code points *** 686195bdb9SMark de Wever check(SV("*\u0800*"), SV("{:*^3}"), SV("\u0800")); // SAMARITAN LETTER ALAF 696195bdb9SMark de Wever check(SV("*\ufffd*"), SV("{:*^3}"), SV("\ufffd")); // REPLACEMENT CHARACTER 70857a78c0SMark de Wever 71857a78c0SMark de Wever // 2 column ranges 726195bdb9SMark de Wever check(SV("*\u1100*"), SV("{:*^4}"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK 736195bdb9SMark de Wever check(SV("*\u115f*"), SV("{:*^4}"), SV("\u115f")); // HANGUL CHOSEONG FILLER 74857a78c0SMark de Wever 756195bdb9SMark de Wever check(SV("*\u2329*"), SV("{:*^4}"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET 766195bdb9SMark de Wever check(SV("*\u232a*"), SV("{:*^4}"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET 77857a78c0SMark de Wever 786195bdb9SMark de Wever check(SV("*\u2e80*"), SV("{:*^4}"), SV("\u2e80")); // CJK RADICAL REPEAT 796195bdb9SMark de Wever check(SV("*\u303e*"), SV("{:*^4}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR 80857a78c0SMark de Wever 8168c3d66aSMark de Wever check(SV("*\u3041*"), SV("{:*^4}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A 8268c3d66aSMark de Wever check(SV("*\ua4d0*"), SV("{:*^3}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA 83857a78c0SMark de Wever 846195bdb9SMark de Wever check(SV("*\uac00*"), SV("{:*^4}"), SV("\uac00")); // <Hangul Syllable, First> 856195bdb9SMark de Wever check(SV("*\ud7a3*"), SV("{:*^4}"), SV("\ud7a3")); // Hangul Syllable Hih 86857a78c0SMark de Wever 876195bdb9SMark de Wever check(SV("*\uf900*"), SV("{:*^4}"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900 886195bdb9SMark de Wever check(SV("*\ufaff*"), SV("{:*^4}"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF 89857a78c0SMark de Wever 906195bdb9SMark de Wever check(SV("*\ufe10*"), SV("{:*^4}"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA 916195bdb9SMark de Wever check(SV("*\ufe19*"), SV("{:*^4}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 92857a78c0SMark de Wever 936195bdb9SMark de Wever check(SV("*\ufe30*"), SV("{:*^4}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER 9468c3d66aSMark de Wever check(SV("*\ufe70*"), SV("{:*^3}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM 95857a78c0SMark de Wever 9668c3d66aSMark de Wever check(SV("*\uff01*"), SV("{:*^4}"), SV("\uff01")); // U+FF01 FULLWIDTH EXCLAMATION MARK 976195bdb9SMark de Wever check(SV("*\uff60*"), SV("{:*^4}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS 98857a78c0SMark de Wever 996195bdb9SMark de Wever check(SV("*\uffe0*"), SV("{:*^4}"), SV("\uffe0")); // FULLWIDTH CENT SIGN 1006195bdb9SMark de Wever check(SV("*\uffe6*"), SV("{:*^4}"), SV("\uffe6")); // FULLWIDTH WON SIGN 101857a78c0SMark de Wever 102857a78c0SMark de Wever //*** 4-byte code points *** 1036195bdb9SMark de Wever check(SV("*\U00010000*"), SV("{:*^3}"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A 1046195bdb9SMark de Wever check(SV("*\U0010FFFF*"), SV("{:*^3}"), SV("\U0010FFFF")); // Undefined Character 105857a78c0SMark de Wever 106857a78c0SMark de Wever // 2 column ranges 1076195bdb9SMark de Wever check(SV("*\U0001f300*"), SV("{:*^4}"), SV("\U0001f300")); // CYCLONE 1086195bdb9SMark de Wever check(SV("*\U0001f64f*"), SV("{:*^4}"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS 1096195bdb9SMark de Wever check(SV("*\U0001f900*"), SV("{:*^4}"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS 1106195bdb9SMark de Wever check(SV("*\U0001f9ff*"), SV("{:*^4}"), SV("\U0001f9ff")); // NAZAR AMULET 1116195bdb9SMark de Wever check(SV("*\U00020000*"), SV("{:*^4}"), SV("\U00020000")); // <CJK Ideograph Extension B, First> 1126195bdb9SMark de Wever check(SV("*\U0002fffd*"), SV("{:*^4}"), SV("\U0002fffd")); // Undefined Character 1136195bdb9SMark de Wever check(SV("*\U00030000*"), SV("{:*^4}"), SV("\U00030000")); // <CJK Ideograph Extension G, First> 1146195bdb9SMark de Wever check(SV("*\U0003fffd*"), SV("{:*^4}"), SV("\U0003fffd")); // Undefined Character 115857a78c0SMark de Wever } 116857a78c0SMark de Wever 117857a78c0SMark de Wever // One column output is unaffected. 118857a78c0SMark de Wever // Two column output is removed, thus the result is only the fill character. 119857a78c0SMark de Wever template <class CharT> 120857a78c0SMark de Wever static void test_single_code_point_truncate() { 121857a78c0SMark de Wever //*** 1-byte code points *** 1226195bdb9SMark de Wever check(SV("* *"), SV("{:*^3.1}"), SV(" ")); 1236195bdb9SMark de Wever check(SV("*~*"), SV("{:*^3.1}"), SV("~")); 124857a78c0SMark de Wever 125857a78c0SMark de Wever //*** 2-byte code points *** 1266195bdb9SMark de Wever check(SV("*\u00a1*"), SV("{:*^3.1}"), SV("\u00a1")); // INVERTED EXCLAMATION MARK 1276195bdb9SMark de Wever check(SV("*\u07ff*"), SV("{:*^3.1}"), SV("\u07ff")); // NKO TAMAN SIGN 128857a78c0SMark de Wever 129857a78c0SMark de Wever //*** 3.1-byte code points *** 1306195bdb9SMark de Wever check(SV("*\u0800*"), SV("{:*^3.1}"), SV("\u0800")); // SAMARITAN LETTER ALAF 1316195bdb9SMark de Wever check(SV("*\ufffd*"), SV("{:*^3.1}"), SV("\ufffd")); // REPLACEMENT CHARACTER 132857a78c0SMark de Wever 133857a78c0SMark de Wever // 2 column ranges 1346195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK 1356195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\u115f")); // HANGUL CHOSEONG FILLER 136857a78c0SMark de Wever 1376195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET 1386195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET 139857a78c0SMark de Wever 1406195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\u2e80")); // CJK RADICAL REPEAT 1416195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR 142857a78c0SMark de Wever 14368c3d66aSMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A 14468c3d66aSMark de Wever check(SV("*\ua4d0*"), SV("{:*^3.1}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA 145857a78c0SMark de Wever 1466195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\uac00")); // <Hangul Syllable, First> 1476195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\ud7a3")); // Hangul Syllable Hih 148857a78c0SMark de Wever 1496195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900 1506195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF 151857a78c0SMark de Wever 1526195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA 1536195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 154857a78c0SMark de Wever 1556195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER 15668c3d66aSMark de Wever check(SV("*\ufe70*"), SV("{:*^3.1}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM 157857a78c0SMark de Wever 15868c3d66aSMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\uff01")); // U+FF01 FULLWIDTH EXCLAMATION MARK 1596195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS 160857a78c0SMark de Wever 1616195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\uffe0")); // FULLWIDTH CENT SIGN 1626195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\uffe6")); // FULLWIDTH WON SIGN 163857a78c0SMark de Wever 164857a78c0SMark de Wever //*** 3.1-byte code points *** 1656195bdb9SMark de Wever check(SV("*\U00010000*"), SV("{:*^3.1}"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A 1666195bdb9SMark de Wever check(SV("*\U0010FFFF*"), SV("{:*^3.1}"), SV("\U0010FFFF")); // Undefined Character 167857a78c0SMark de Wever 168857a78c0SMark de Wever // 2 column ranges 1696195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\U0001f300")); // CYCLONE 1706195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS 1716195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS 1726195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\U0001f9ff")); // NAZAR AMULET 1736195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\U00020000")); // <CJK Ideograph Extension B, First> 1746195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\U0002fffd")); // Undefined Character 1756195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\U00030000")); // <CJK Ideograph Extension G, First> 1766195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\U0003fffd")); // Undefined Character 177857a78c0SMark de Wever } 178857a78c0SMark de Wever 179857a78c0SMark de Wever // The examples used in that paper. 180857a78c0SMark de Wever template <class CharT> 181857a78c0SMark de Wever static void test_P1868() { 182857a78c0SMark de Wever // Fill 1836195bdb9SMark de Wever check(SV("*\u0041*"), SV("{:*^3}"), SV("\u0041")); // { LATIN CAPITAL LETTER A } 1846195bdb9SMark de Wever check(SV("*\u00c1*"), SV("{:*^3}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } 1856195bdb9SMark de Wever check(SV("*\u0041\u0301*"), 1866195bdb9SMark de Wever SV("{:*^3}"), 187857a78c0SMark de Wever SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } 1886195bdb9SMark de Wever check(SV("*\u0132*"), SV("{:*^3}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ } 1896195bdb9SMark de Wever check(SV("*\u0394*"), SV("{:*^3}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA } 190857a78c0SMark de Wever 1916195bdb9SMark de Wever check(SV("*\u0429*"), SV("{:*^3}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } 1926195bdb9SMark de Wever check(SV("*\u05d0*"), SV("{:*^3}"), SV("\u05d0")); // { HEBREW LETTER ALEF } 1936195bdb9SMark de Wever check(SV("*\u0634*"), SV("{:*^3}"), SV("\u0634")); // { ARABIC LETTER SHEEN } 1946195bdb9SMark de Wever check(SV("*\u3009*"), SV("{:*^4}"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } 1956195bdb9SMark de Wever check(SV("*\u754c*"), SV("{:*^4}"), SV("\u754c")); // { CJK Unified Ideograph-754C } 1966195bdb9SMark de Wever check(SV("*\U0001f921*"), SV("{:*^4}"), SV("\U0001f921")); // { UNICORN FACE } 1976195bdb9SMark de Wever check(SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"), 1986195bdb9SMark de Wever SV("{:*^4}"), 199857a78c0SMark de Wever SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } 200857a78c0SMark de Wever 201857a78c0SMark de Wever // Truncate to 1 column: 1 column grapheme clusters are kept together. 2026195bdb9SMark de Wever check(SV("*\u0041*"), SV("{:*^3.1}"), SV("\u0041")); // { LATIN CAPITAL LETTER A } 2036195bdb9SMark de Wever check(SV("*\u00c1*"), SV("{:*^3.1}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } 2046195bdb9SMark de Wever check(SV("*\u0041\u0301*"), 2056195bdb9SMark de Wever SV("{:*^3.1}"), 206857a78c0SMark de Wever SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } 2076195bdb9SMark de Wever check(SV("*\u0132*"), SV("{:*^3.1}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ } 2086195bdb9SMark de Wever check(SV("*\u0394*"), SV("{:*^3.1}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA } 209857a78c0SMark de Wever 2106195bdb9SMark de Wever check(SV("*\u0429*"), SV("{:*^3.1}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } 2116195bdb9SMark de Wever check(SV("*\u05d0*"), SV("{:*^3.1}"), SV("\u05d0")); // { HEBREW LETTER ALEF } 2126195bdb9SMark de Wever check(SV("*\u0634*"), SV("{:*^3.1}"), SV("\u0634")); // { ARABIC LETTER SHEEN } 2136195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } 2146195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\u754c")); // { CJK Unified Ideograph-754C } 2156195bdb9SMark de Wever check(SV("***"), SV("{:*^3.1}"), SV("\U0001f921")); // { UNICORN FACE } 2166195bdb9SMark de Wever check(SV("***"), 2176195bdb9SMark de Wever SV("{:*^3.1}"), 218857a78c0SMark de Wever SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } 219857a78c0SMark de Wever 220857a78c0SMark de Wever // Truncate to 2 column: 2 column grapheme clusters are kept together. 2216195bdb9SMark de Wever check(SV("*\u0041*"), SV("{:*^3.2}"), SV("\u0041")); // { LATIN CAPITAL LETTER A } 2226195bdb9SMark de Wever check(SV("*\u00c1*"), SV("{:*^3.2}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } 2236195bdb9SMark de Wever check(SV("*\u0041\u0301*"), 2246195bdb9SMark de Wever SV("{:*^3.2}"), 225857a78c0SMark de Wever SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } 2266195bdb9SMark de Wever check(SV("*\u0132*"), SV("{:*^3.2}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ } 2276195bdb9SMark de Wever check(SV("*\u0394*"), SV("{:*^3.2}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA } 228857a78c0SMark de Wever 2296195bdb9SMark de Wever check(SV("*\u0429*"), SV("{:*^3.2}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } 2306195bdb9SMark de Wever check(SV("*\u05d0*"), SV("{:*^3.2}"), SV("\u05d0")); // { HEBREW LETTER ALEF } 2316195bdb9SMark de Wever check(SV("*\u0634*"), SV("{:*^3.2}"), SV("\u0634")); // { ARABIC LETTER SHEEN } 2326195bdb9SMark de Wever check(SV("*\u3009*"), SV("{:*^4.2}"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } 2336195bdb9SMark de Wever check(SV("*\u754c*"), SV("{:*^4.2}"), SV("\u754c")); // { CJK Unified Ideograph-754C } 2346195bdb9SMark de Wever check(SV("*\U0001f921*"), SV("{:*^4.2}"), SV("\U0001f921")); // { UNICORN FACE } 2356195bdb9SMark de Wever check(SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"), 2366195bdb9SMark de Wever SV("{:*^4.2}"), 237857a78c0SMark de Wever SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } 238857a78c0SMark de Wever } 239857a78c0SMark de Wever 240857a78c0SMark de Wever #ifdef _LIBCPP_VERSION 241857a78c0SMark de Wever // Tests the libc++ specific behaviour for malformed UTF-sequences. The 242857a78c0SMark de Wever // Standard doesn't specify how to handle this. 243857a78c0SMark de Wever template <class CharT> 244857a78c0SMark de Wever static void test_malformed_code_point() { 245857a78c0SMark de Wever if constexpr (sizeof(CharT) == 1) { 246857a78c0SMark de Wever // Malformed at end. 2476195bdb9SMark de Wever check(SV("*ZZZZ\x8f*"), SV("{:*^7}"), SV("ZZZZ\x8f")); 2486195bdb9SMark de Wever check(SV("*ZZZZ\xcf*"), SV("{:*^7}"), SV("ZZZZ\xcf")); 2496195bdb9SMark de Wever check(SV("*ZZZZ\xef*"), SV("{:*^7}"), SV("ZZZZ\xef")); 2506195bdb9SMark de Wever check(SV("*ZZZZ\xff*"), SV("{:*^7}"), SV("ZZZZ\xff")); 251857a78c0SMark de Wever 252857a78c0SMark de Wever // Malformed in middle, no continuation 2536195bdb9SMark de Wever check(SV("*ZZZZ\x8fZ*"), SV("{:*^8}"), SV("ZZZZ\x8fZ")); 2546195bdb9SMark de Wever check(SV("*ZZZZ\xcfZ*"), SV("{:*^8}"), SV("ZZZZ\xcfZ")); 2556195bdb9SMark de Wever check(SV("*ZZZZ\xefZ*"), SV("{:*^8}"), SV("ZZZZ\xefZ")); 2566195bdb9SMark de Wever check(SV("*ZZZZ\xffZ*"), SV("{:*^8}"), SV("ZZZZ\xffZ")); 257857a78c0SMark de Wever 2586195bdb9SMark de Wever check(SV("*ZZZZ\x8fZZ*"), SV("{:*^9}"), SV("ZZZZ\x8fZZ")); 2596195bdb9SMark de Wever check(SV("*ZZZZ\xcfZZ*"), SV("{:*^9}"), SV("ZZZZ\xcfZZ")); 2606195bdb9SMark de Wever check(SV("*ZZZZ\xefZZ*"), SV("{:*^9}"), SV("ZZZZ\xefZZ")); 2616195bdb9SMark de Wever check(SV("*ZZZZ\xffZZ*"), SV("{:*^9}"), SV("ZZZZ\xffZZ")); 262857a78c0SMark de Wever 2636195bdb9SMark de Wever check(SV("*ZZZZ\x8fZZZ*"), SV("{:*^10}"), SV("ZZZZ\x8fZZZ")); 2646195bdb9SMark de Wever check(SV("*ZZZZ\xcfZZZ*"), SV("{:*^10}"), SV("ZZZZ\xcfZZZ")); 2656195bdb9SMark de Wever check(SV("*ZZZZ\xefZZZ*"), SV("{:*^10}"), SV("ZZZZ\xefZZZ")); 2666195bdb9SMark de Wever check(SV("*ZZZZ\xffZZZ*"), SV("{:*^10}"), SV("ZZZZ\xffZZZ")); 267857a78c0SMark de Wever 2686195bdb9SMark de Wever check(SV("*ZZZZ\x8fZZZZ*"), SV("{:*^11}"), SV("ZZZZ\x8fZZZZ")); 2696195bdb9SMark de Wever check(SV("*ZZZZ\xcfZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xcfZZZZ")); 2706195bdb9SMark de Wever check(SV("*ZZZZ\xefZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xefZZZZ")); 2716195bdb9SMark de Wever check(SV("*ZZZZ\xffZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xffZZZZ")); 272857a78c0SMark de Wever 27309addf9cSMark de Wever // Invalid continuations 27409addf9cSMark de Wever check(SV("\xc2\x00"), SV("{}"), SV("\xc2\x00")); 27509addf9cSMark de Wever check(SV("\xc2\x40"), SV("{}"), SV("\xc2\x40")); 27609addf9cSMark de Wever check(SV("\xc2\xc0"), SV("{}"), SV("\xc2\xc0")); 27709addf9cSMark de Wever 27809addf9cSMark de Wever check(SV("\xe0\x00\x80"), SV("{}"), SV("\xe0\x00\x80")); 27909addf9cSMark de Wever check(SV("\xe0\x40\x80"), SV("{}"), SV("\xe0\x40\x80")); 28009addf9cSMark de Wever check(SV("\xe0\xc0\x80"), SV("{}"), SV("\xe0\xc0\x80")); 28109addf9cSMark de Wever 28209addf9cSMark de Wever check(SV("\xe0\x80\x00"), SV("{}"), SV("\xe0\x80\x00")); 28309addf9cSMark de Wever check(SV("\xe0\x80\x40"), SV("{}"), SV("\xe0\x80\x40")); 28409addf9cSMark de Wever check(SV("\xe0\x80\xc0"), SV("{}"), SV("\xe0\x80\xc0")); 28509addf9cSMark de Wever 28609addf9cSMark de Wever check(SV("\xf0\x80\x80\x00"), SV("{}"), SV("\xf0\x80\x80\x00")); 28709addf9cSMark de Wever check(SV("\xf0\x80\x80\x40"), SV("{}"), SV("\xf0\x80\x80\x40")); 28809addf9cSMark de Wever check(SV("\xf0\x80\x80\xc0"), SV("{}"), SV("\xf0\x80\x80\xc0")); 28909addf9cSMark de Wever 29009addf9cSMark de Wever check(SV("\xf0\x80\x00\x80"), SV("{}"), SV("\xf0\x80\x00\x80")); 29109addf9cSMark de Wever check(SV("\xf0\x80\x40\x80"), SV("{}"), SV("\xf0\x80\x40\x80")); 29209addf9cSMark de Wever check(SV("\xf0\x80\xc0\x80"), SV("{}"), SV("\xf0\x80\xc0\x80")); 29309addf9cSMark de Wever 29409addf9cSMark de Wever check(SV("\xf0\x00\x80\x80"), SV("{}"), SV("\xf0\x00\x80\x80")); 29509addf9cSMark de Wever check(SV("\xf0\x40\x80\x80"), SV("{}"), SV("\xf0\x40\x80\x80")); 29609addf9cSMark de Wever check(SV("\xf0\xc0\x80\x80"), SV("{}"), SV("\xf0\xc0\x80\x80")); 29709addf9cSMark de Wever 298857a78c0SMark de Wever // Premature end. 2996195bdb9SMark de Wever check(SV("*ZZZZ\xef\xf5*"), SV("{:*^8}"), SV("ZZZZ\xef\xf5")); 3006195bdb9SMark de Wever check(SV("*ZZZZ\xef\xf5ZZZZ*"), SV("{:*^12}"), SV("ZZZZ\xef\xf5ZZZZ")); 3016195bdb9SMark de Wever check(SV("*ZZZZ\xff\xf5\xf5*"), SV("{:*^9}"), SV("ZZZZ\xff\xf5\xf5")); 3026195bdb9SMark de Wever check(SV("*ZZZZ\xff\xf5\xf5ZZZZ*"), SV("{:*^13}"), SV("ZZZZ\xff\xf5\xf5ZZZZ")); 303857a78c0SMark de Wever 304857a78c0SMark de Wever } else if constexpr (sizeof(CharT) == 2) { 305857a78c0SMark de Wever // TODO FMT Add these tests. 306857a78c0SMark de Wever } 307857a78c0SMark de Wever // UTF-32 doesn't combine characters, thus no corruption tests. 308857a78c0SMark de Wever } 309857a78c0SMark de Wever #endif 310857a78c0SMark de Wever 311857a78c0SMark de Wever template <class CharT> 312857a78c0SMark de Wever static void test() { 313857a78c0SMark de Wever test_single_code_point_fill<CharT>(); 314857a78c0SMark de Wever test_single_code_point_truncate<CharT>(); 315857a78c0SMark de Wever test_P1868<CharT>(); 316857a78c0SMark de Wever 317857a78c0SMark de Wever #ifdef _LIBCPP_VERSION 318857a78c0SMark de Wever test_malformed_code_point<CharT>(); 319857a78c0SMark de Wever #endif 320857a78c0SMark de Wever } 321857a78c0SMark de Wever 322857a78c0SMark de Wever int main(int, char**) { 323857a78c0SMark de Wever test<char>(); 324857a78c0SMark de Wever 325857a78c0SMark de Wever #ifndef TEST_HAS_NO_WIDE_CHARACTERS 326857a78c0SMark de Wever test<wchar_t>(); 327857a78c0SMark de Wever #endif 328857a78c0SMark de Wever 329857a78c0SMark de Wever return 0; 330857a78c0SMark de Wever } 331