1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // UNSUPPORTED: c++03, c++11, c++14, c++17
10 // UNSUPPORTED: libcpp-has-no-unicode
11 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
12 
13 // <format>
14 
15 // Tests the implementation of the extended grapheme cluster boundaries per
16 // https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
17 //
18 // The tests are based on the test data provided by Unicode
19 // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
20 
21 #include <cassert>
22 #include <format>
23 #include <functional>
24 #include <numeric>
25 
26 #include "extended_grapheme_cluster.h"
27 
28 // Validates whether the number of code points in our "database" matches with
29 // the number in the Unicode. The assumption is when the number of items per
30 // property matches the code points themselves also match.
31 namespace {
32 namespace cluster = std::__extended_grapheme_custer_property_boundary;
33 constexpr int count_entries(cluster::__property property) {
34   return std::transform_reduce(
35       std::begin(cluster::__entries), std::end(cluster::__entries), 0, std::plus{}, [property](auto entry) {
36         if (static_cast<cluster::__property>(entry & 0xf) != property)
37           return 0;
38 
39         return 1 + static_cast<int>((entry >> 4) & 0x7f);
40       });
41 }
42 
43 static_assert(count_entries(cluster::__property::__Prepend) == 27);
44 static_assert(count_entries(cluster::__property::__CR) == 1);
45 static_assert(count_entries(cluster::__property::__LF) == 1);
46 static_assert(count_entries(cluster::__property::__Control) == 3893);
47 static_assert(count_entries(cluster::__property::__Extend) == 2130);
48 static_assert(count_entries(cluster::__property::__Regional_Indicator) == 26);
49 static_assert(count_entries(cluster::__property::__SpacingMark) == 395);
50 static_assert(count_entries(cluster::__property::__L) == 125);
51 static_assert(count_entries(cluster::__property::__V) == 95);
52 static_assert(count_entries(cluster::__property::__T) == 137);
53 static_assert(count_entries(cluster::__property::__LV) == 399);
54 static_assert(count_entries(cluster::__property::__LVT) == 10773);
55 static_assert(count_entries(cluster::__property::__ZWJ) == 1);
56 static_assert(count_entries(cluster::__property::__Extended_Pictographic) == 3537);
57 
58 namespace inCB = std::__indic_conjunct_break;
59 constexpr int count_entries(inCB::__property property) {
60   return std::transform_reduce(
61       std::begin(inCB::__entries), std::end(inCB::__entries), 0, std::plus{}, [property](auto entry) {
62         if (static_cast<inCB::__property>(entry & 0b11) != property)
63           return 0;
64 
65         return 1 + static_cast<int>((entry >> 2) & 0b1'1111'1111);
66       });
67 }
68 
69 static_assert(count_entries(inCB::__property::__Linker) == 6);
70 static_assert(count_entries(inCB::__property::__Consonant) == 240);
71 static_assert(count_entries(inCB::__property::__Extend) == 884);
72 
73 } // namespace
74 
75 template <class Data>
76 constexpr void test(const Data& data) {
77   for (const auto& d : data) {
78     assert(d.code_points.size() == d.breaks.size());
79 
80     std::__unicode::__extended_grapheme_cluster_view view{d.input.begin(), d.input.end()};
81     for (std::size_t i = 0; i < d.breaks.size(); ++i) {
82       auto r = view.__consume();
83       assert(r.__code_point_ == d.code_points[i]);
84       assert(r.__last_ == d.input.begin() + d.breaks[i]);
85     }
86   }
87 }
88 
89 constexpr bool test() {
90   test(data_utf8);
91 
92 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
93   if constexpr (sizeof(wchar_t) == 2)
94     test(data_utf16);
95   else
96     test(data_utf32);
97 #endif
98 
99   return true;
100 }
101 
102 int main(int, char**) {
103   test();
104   // static_assert(test());
105 
106   return 0;
107 }
108