1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
10 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
11 
12 // <format>
13 
14 // Tests the properties of the Unicode escaped output table.
15 // The libc++ algorithm has size and speed optimizations based on the properties
16 // of Unicode. This means updating the Unicode tables has a likilihood of
17 // breaking test. This is an assert; it requires validating whether the
18 // assumptions of the size and speed optimizations are still valid.
19 
20 #include <algorithm>
21 #include <cassert>
22 #include <format>
23 #include <functional>
24 #include <numeric>
25 
26 // Contains the entries for [format.string.escaped]/2.2.1.2.1
27 //   CE is a Unicode encoding and C corresponds to a UCS scalar value whose
28 //   Unicode property General_Category has a value in the groups Separator (Z)
29 //   or Other (C), as described by table 12 of UAX #44
30 //
31 // Separator (Z) consists of General_Category
32 // - Zs Space_Separator,
33 // - Zl Line_Separator,
34 // - Zp Paragraph_Separator.
35 //
36 // Other (C) consists of General_Category
37 // - Cc Control,
38 // - Cf Format,
39 // - Cs Surrogate,
40 // - Co Private_Use,
41 // - Cn Unassigned.
42 inline constexpr int Zs = 17;
43 inline constexpr int Zl = 1;
44 inline constexpr int Zp = 1;
45 inline constexpr int Z  = Zs + Zl + Zp;
46 
47 inline constexpr int Cc = 65;
48 inline constexpr int Cf = 170;
49 inline constexpr int Cs = 2'048;
50 inline constexpr int Co = 137'468;
51 inline constexpr int Cn = 824'718;
52 inline constexpr int C  = Cc + Cf + Cs + Co + Cn;
53 
54 // This is the final part of the Unicode properties table:
55 //
56 // 31350..323AF  ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
57 // 323B0..E0000  ; Cn # [711761] <reserved-323B0>..<reserved-E0000>
58 // E0001         ; Cf #       LANGUAGE TAG
59 // E0002..E001F  ; Cn #  [30] <reserved-E0002>..<reserved-E001F>
60 // E0020..E007F  ; Cf #  [96] TAG SPACE..CANCEL TAG
61 // E0080..E00FF  ; Cn # [128] <reserved-E0080>..<reserved-E00FF>
62 // E0100..E01EF  ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
63 // E01F0..EFFFF  ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF>
64 // F0000..FFFFD  ; Co # [65534] <private-use-F0000>..<private-use-FFFFD>
65 // FFFFE..FFFFF  ; Cn #   [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
66 // 100000..10FFFD; Co # [65534] <private-use-100000>..<private-use-10FFFD>
67 // 10FFFE..10FFFF; Cn #   [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>
68 //
69 // It can be observed all entries in the range 323B0..10FFFF are in the
70 // categories Cf, Co, Cn, except a small range with the property Mn.
71 // In order to reduce the size of the table only the entires in the range
72 // [0000, 323B0) are stored in the table. The entries in the range
73 // [323B0, 10FFFF] use a hand-crafted algorithm.
74 //
75 // This means a number of entries are omitted
76 inline constexpr int excluded = ((0x10FFFF - 0x323B0) + 1) - 240;
77 
78 inline constexpr int entries = Z + C - excluded;
79 
80 static constexpr int count_entries() {
81   return std::transform_reduce(
82       std::begin(std::__escaped_output_table::__entries),
83       std::end(std::__escaped_output_table::__entries),
84       0,
85       std::plus{},
86       [](auto entry) { return 1 + static_cast<int>(entry & 0x3fffu); });
87 }
88 static_assert(count_entries() == entries);
89 
90 int main(int, char**) {
91   for (char32_t c = 0x31350; c <= 0x323AF; ++c) // 31350..323AF  ; Lo # [4192]
92     assert(std::__escaped_output_table::__needs_escape(c) == false);
93 
94   for (char32_t c = 0x323B0; c <= 0xE00FF; ++c) // 323B0..E00FF ; C
95     assert(std::__escaped_output_table::__needs_escape(c) == true);
96 
97   for (char32_t c = 0xE0100; c <= 0xE01EF; ++c) // E0100..E01EF  ; Mn # [240]
98     assert(std::__escaped_output_table::__needs_escape(c) == false);
99 
100   for (char32_t c = 0xE01F0; c <= 0x10FFFF; ++c) // E01F0..10FFFF; C
101     assert(std::__escaped_output_table::__needs_escape(c) == true);
102 
103   return 0;
104 }
105