1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // UNSUPPORTED: c++03, c++11, c++14, c++17 10 // UNSUPPORTED: libcpp-has-no-unicode 11 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME 12 13 // <format> 14 15 // Tests the implementation of the extended grapheme cluster boundaries per 16 // https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules 17 // 18 // The tests are based on the test data provided by Unicode 19 // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt 20 21 #include <cassert> 22 #include <format> 23 #include <functional> 24 #include <numeric> 25 26 #include "extended_grapheme_cluster.h" 27 28 // Validates whether the number of code points in our "database" matches with 29 // the number in the Unicode. The assumption is when the number of items per 30 // property matches the code points themselves also match. 31 namespace { 32 namespace cluster = std::__extended_grapheme_custer_property_boundary; 33 constexpr int count_entries(cluster::__property property) { 34 return std::transform_reduce( 35 std::begin(cluster::__entries), std::end(cluster::__entries), 0, std::plus{}, [property](auto entry) { 36 if (static_cast<cluster::__property>(entry & 0xf) != property) 37 return 0; 38 39 return 1 + static_cast<int>((entry >> 4) & 0x7f); 40 }); 41 } 42 43 static_assert(count_entries(cluster::__property::__Prepend) == 27); 44 static_assert(count_entries(cluster::__property::__CR) == 1); 45 static_assert(count_entries(cluster::__property::__LF) == 1); 46 static_assert(count_entries(cluster::__property::__Control) == 3893); 47 static_assert(count_entries(cluster::__property::__Extend) == 2130); 48 static_assert(count_entries(cluster::__property::__Regional_Indicator) == 26); 49 static_assert(count_entries(cluster::__property::__SpacingMark) == 395); 50 static_assert(count_entries(cluster::__property::__L) == 125); 51 static_assert(count_entries(cluster::__property::__V) == 95); 52 static_assert(count_entries(cluster::__property::__T) == 137); 53 static_assert(count_entries(cluster::__property::__LV) == 399); 54 static_assert(count_entries(cluster::__property::__LVT) == 10773); 55 static_assert(count_entries(cluster::__property::__ZWJ) == 1); 56 static_assert(count_entries(cluster::__property::__Extended_Pictographic) == 3537); 57 58 namespace inCB = std::__indic_conjunct_break; 59 constexpr int count_entries(inCB::__property property) { 60 return std::transform_reduce( 61 std::begin(inCB::__entries), std::end(inCB::__entries), 0, std::plus{}, [property](auto entry) { 62 if (static_cast<inCB::__property>(entry & 0b11) != property) 63 return 0; 64 65 return 1 + static_cast<int>((entry >> 2) & 0b1'1111'1111); 66 }); 67 } 68 69 static_assert(count_entries(inCB::__property::__Linker) == 6); 70 static_assert(count_entries(inCB::__property::__Consonant) == 240); 71 static_assert(count_entries(inCB::__property::__Extend) == 884); 72 73 } // namespace 74 75 template <class Data> 76 constexpr void test(const Data& data) { 77 for (const auto& d : data) { 78 assert(d.code_points.size() == d.breaks.size()); 79 80 std::__unicode::__extended_grapheme_cluster_view view{d.input.begin(), d.input.end()}; 81 for (std::size_t i = 0; i < d.breaks.size(); ++i) { 82 auto r = view.__consume(); 83 assert(r.__code_point_ == d.code_points[i]); 84 assert(r.__last_ == d.input.begin() + d.breaks[i]); 85 } 86 } 87 } 88 89 constexpr bool test() { 90 test(data_utf8); 91 92 #ifndef TEST_HAS_NO_WIDE_CHARACTERS 93 if constexpr (sizeof(wchar_t) == 2) 94 test(data_utf16); 95 else 96 test(data_utf32); 97 #endif 98 99 return true; 100 } 101 102 int main(int, char**) { 103 test(); 104 // static_assert(test()); 105 106 return 0; 107 } 108