1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 10 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME 11 12 // <format> 13 14 // Tests the properties of the Unicode escaped output table. 15 // The libc++ algorithm has size and speed optimizations based on the properties 16 // of Unicode. This means updating the Unicode tables has a likilihood of 17 // breaking test. This is an assert; it requires validating whether the 18 // assumptions of the size and speed optimizations are still valid. 19 20 #include <algorithm> 21 #include <cassert> 22 #include <format> 23 #include <functional> 24 #include <numeric> 25 26 // Contains the entries for [format.string.escaped]/2.2.1.2.1 27 // CE is a Unicode encoding and C corresponds to a UCS scalar value whose 28 // Unicode property General_Category has a value in the groups Separator (Z) 29 // or Other (C), as described by table 12 of UAX #44 30 // 31 // Separator (Z) consists of General_Category 32 // - Zs Space_Separator, 33 // - Zl Line_Separator, 34 // - Zp Paragraph_Separator. 35 // 36 // Other (C) consists of General_Category 37 // - Cc Control, 38 // - Cf Format, 39 // - Cs Surrogate, 40 // - Co Private_Use, 41 // - Cn Unassigned. 42 inline constexpr int Zs = 17; 43 inline constexpr int Zl = 1; 44 inline constexpr int Zp = 1; 45 inline constexpr int Z = Zs + Zl + Zp; 46 47 inline constexpr int Cc = 65; 48 inline constexpr int Cf = 170; 49 inline constexpr int Cs = 2'048; 50 inline constexpr int Co = 137'468; 51 inline constexpr int Cn = 824'718; 52 inline constexpr int C = Cc + Cf + Cs + Co + Cn; 53 54 // This is the final part of the Unicode properties table: 55 // 56 // 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF 57 // 323B0..E0000 ; Cn # [711761] <reserved-323B0>..<reserved-E0000> 58 // E0001 ; Cf # LANGUAGE TAG 59 // E0002..E001F ; Cn # [30] <reserved-E0002>..<reserved-E001F> 60 // E0020..E007F ; Cf # [96] TAG SPACE..CANCEL TAG 61 // E0080..E00FF ; Cn # [128] <reserved-E0080>..<reserved-E00FF> 62 // E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 63 // E01F0..EFFFF ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF> 64 // F0000..FFFFD ; Co # [65534] <private-use-F0000>..<private-use-FFFFD> 65 // FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 66 // 100000..10FFFD; Co # [65534] <private-use-100000>..<private-use-10FFFD> 67 // 10FFFE..10FFFF; Cn # [2] <noncharacter-10FFFE>..<noncharacter-10FFFF> 68 // 69 // It can be observed all entries in the range 323B0..10FFFF are in the 70 // categories Cf, Co, Cn, except a small range with the property Mn. 71 // In order to reduce the size of the table only the entires in the range 72 // [0000, 323B0) are stored in the table. The entries in the range 73 // [323B0, 10FFFF] use a hand-crafted algorithm. 74 // 75 // This means a number of entries are omitted 76 inline constexpr int excluded = ((0x10FFFF - 0x323B0) + 1) - 240; 77 78 inline constexpr int entries = Z + C - excluded; 79 80 static constexpr int count_entries() { 81 return std::transform_reduce( 82 std::begin(std::__escaped_output_table::__entries), 83 std::end(std::__escaped_output_table::__entries), 84 0, 85 std::plus{}, 86 [](auto entry) { return 1 + static_cast<int>(entry & 0x3fffu); }); 87 } 88 static_assert(count_entries() == entries); 89 90 int main(int, char**) { 91 for (char32_t c = 0x31350; c <= 0x323AF; ++c) // 31350..323AF ; Lo # [4192] 92 assert(std::__escaped_output_table::__needs_escape(c) == false); 93 94 for (char32_t c = 0x323B0; c <= 0xE00FF; ++c) // 323B0..E00FF ; C 95 assert(std::__escaped_output_table::__needs_escape(c) == true); 96 97 for (char32_t c = 0xE0100; c <= 0xE01EF; ++c) // E0100..E01EF ; Mn # [240] 98 assert(std::__escaped_output_table::__needs_escape(c) == false); 99 100 for (char32_t c = 0xE01F0; c <= 0x10FFFF; ++c) // E01F0..10FFFF; C 101 assert(std::__escaped_output_table::__needs_escape(c) == true); 102 103 return 0; 104 } 105