1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // UNSUPPORTED: c++03, c++11, c++14, c++17 10 // UNSUPPORTED: libcpp-has-no-incomplete-ranges 11 12 // template <class View, class Pattern> 13 // class std::ranges::lazy_split_view; 14 // 15 // These test check the output `lazy_split_view` produces for a variety of inputs, including many corner cases, with no 16 // restrictions on which member functions can be called. 17 18 #include <ranges> 19 20 #include <algorithm> 21 #include <array> 22 #include <cassert> 23 #include <map> 24 #include <string> 25 #include <string_view> 26 #include <utility> 27 #include <vector> 28 #include "small_string.h" 29 #include "types.h" 30 31 template <std::ranges::view View, std::ranges::range Expected> 32 constexpr bool is_equal(View& view, const Expected& expected) { 33 using Char = std::ranges::range_value_t<std::ranges::range_value_t<View>>; 34 using Str = BasicSmallString<Char>; 35 36 auto actual_it = view.begin(); 37 auto expected_it = expected.begin(); 38 for (; actual_it != view.end() && expected_it != expected.end(); ++actual_it, ++expected_it) { 39 if (Str(*actual_it) != Str(*expected_it)) 40 return false; 41 } 42 43 return actual_it == view.end() && expected_it == expected.end(); 44 } 45 46 template <class T, class Separator, class U, size_t M> 47 constexpr bool test_function_call(T&& input, Separator&& separator, std::array<U, M> expected) { 48 std::ranges::lazy_split_view v(input, separator); 49 return is_equal(v, expected); 50 } 51 52 template <class T, class Separator, class U, size_t M> 53 constexpr bool test_with_piping(T&& input, Separator&& separator, std::array<U, M> expected) { 54 auto expected_it = expected.begin(); 55 for (auto e : input | std::ranges::views::lazy_split(separator)) { 56 if (expected_it == expected.end()) 57 return false; 58 if (SmallString(e) != *expected_it) 59 return false; 60 61 ++expected_it; 62 } 63 64 return expected_it == expected.end(); 65 } 66 67 constexpr bool test_l_r_values() { 68 using namespace std::string_view_literals; 69 70 // Both lvalues and rvalues can be used as input. 71 { 72 // Lvalues. 73 { 74 auto input = "abc"sv; 75 auto sep = " "sv; 76 [[maybe_unused]] std::ranges::lazy_split_view v(input, sep); 77 } 78 79 // Const lvalues. 80 { 81 const auto input = "abc"sv; 82 const auto sep = " "sv; 83 [[maybe_unused]] std::ranges::lazy_split_view v(input, sep); 84 } 85 86 // Rvalues. 87 { 88 auto input = "abc"sv; 89 auto sep = " "sv; 90 [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep)); 91 } 92 93 // Const rvalues. 94 { 95 const auto input = "abc"sv; 96 const auto sep = " "sv; 97 [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep)); 98 } 99 } 100 101 return true; 102 } 103 104 constexpr bool test_string_literal_separator() { 105 using namespace std::string_view_literals; 106 107 // Splitting works as expected when the separator is a single character literal. 108 { 109 std::ranges::lazy_split_view v("abc def"sv, ' '); 110 assert(is_equal(v, std::array{"abc"sv, "def"sv})); 111 } 112 113 // Counterintuitively, a seemingly equivalent separator expressed as a string literal doesn't match anything. This is 114 // because of the implicit terminating null in the literal. 115 { 116 std::ranges::lazy_split_view v("abc def"sv, " "); 117 assert(is_equal(v, std::array{"abc def"sv})); 118 } 119 120 // To illustrate the previous point further, the separator is actually a two-character string literal: `{' ', '\0'}`. 121 // Should the input string contain that two-character sequence, the separator would match. 122 { 123 std::ranges::lazy_split_view v("abc \0def"sv, " "); 124 assert(is_equal(v, std::array{"abc"sv, "def"sv})); 125 } 126 127 return true; 128 } 129 130 // Make sure that a string literal and a `string_view` produce the same results (which isn't always the case, see 131 // below). 132 template <class T> 133 constexpr std::string_view sv(T&& str) { 134 return std::string_view(str); 135 }; 136 137 template <class T, class Separator, class U, size_t M> 138 constexpr void test_one(T&& input, Separator&& separator, std::array<U, M> expected) { 139 assert(test_function_call(input, separator, expected)); 140 assert(test_with_piping(input, separator, expected)); 141 142 // In addition to the `(ForwardView, ForwardView)` case, test the `(ForwardView, tiny-range)` and `(InputView, 143 // tiny-range)` cases (all of which have unique code paths). 144 if constexpr (std::is_same_v<std::remove_reference_t<Separator>, char>) { 145 assert(test_function_call(CopyableView(input), ForwardTinyView(separator), expected)); 146 assert(test_with_piping(CopyableView(input), ForwardTinyView(separator), expected)); 147 148 assert(test_function_call(InputView(input), ForwardTinyView(separator), expected)); 149 assert(test_with_piping(InputView(input), ForwardTinyView(separator), expected)); 150 } 151 } 152 153 constexpr bool test_string_literals() { 154 // These tests show characteristic examples of how using string literals with `lazy_split_view` produces unexpected 155 // results due to the implicit terminating null that is treated as part of the range. 156 157 using namespace std::string_view_literals; 158 159 char short_sep = ' '; 160 auto long_sep = "12"sv; 161 162 // When splitting a string literal, only the last segment will be null-terminated (getting the terminating null from 163 // the original range). 164 { 165 std::array expected = {"abc"sv, std::string_view("def", sizeof("def"))}; 166 167 assert(test_function_call("abc def", short_sep, expected)); 168 assert(test_with_piping("abc def", short_sep, expected)); 169 assert(test_function_call("abc12def", long_sep, expected)); 170 assert(test_with_piping("abc12def", long_sep, expected)); 171 } 172 173 // Empty string. 174 { 175 // Because an empty string literal contains an implicit terminating null, the output will contain one segment. 176 std::array expected = {std::string_view("", 1)}; 177 178 assert(test_function_call("", short_sep, expected)); 179 assert(test_with_piping("", short_sep, expected)); 180 assert(test_function_call("", long_sep, expected)); 181 assert(test_with_piping("", long_sep, expected)); 182 } 183 184 // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "` 185 // are treated differently due to the presence of an implicit `\0` in the latter. 186 { 187 const char input[] = "abc def"; 188 std::array expected_unsplit = {std::string_view(input, sizeof(input))}; 189 std::array expected_split = {"abc"sv, std::string_view("def", sizeof("def"))}; 190 191 assert(test_function_call(input, " ", expected_unsplit)); 192 assert(test_function_call("abc \0def", " ", expected_split)); 193 // Note: string literals don't work with piping because arrays decay to pointers, and pointers don't model `range`. 194 } 195 196 // Empty separator. 197 { 198 auto empty_sep = ""sv; 199 std::array expected = {"a"sv, "b"sv, "c"sv, "\0"sv}; 200 201 assert(test_function_call("abc", empty_sep, expected)); 202 assert(test_with_piping("abc", empty_sep, expected)); 203 } 204 205 return true; 206 } 207 208 bool test_nontrivial_characters() { 209 // Try a deliberately heavyweight "character" type to see if it triggers any corner cases. 210 211 using Map = std::map<std::string, int>; 212 using Vec = std::vector<Map>; 213 214 Map sep = {{"yyy", 999}}; 215 Map m1 = { 216 {"a", 1}, 217 {"bc", 2}, 218 }; 219 Map m2 = { 220 {"def", 3}, 221 }; 222 Map m3 = { 223 {"g", 4}, 224 {"hijk", 5}, 225 }; 226 227 Vec expected1 = {m1, m2}; 228 Vec expected2 = {m3}; 229 230 std::ranges::lazy_split_view v(Vec{m1, m2, sep, m3}, sep); 231 232 // Segment 1: {m1, m2} 233 auto outer = v.begin(); 234 assert(outer != v.end()); 235 auto inner = (*outer).begin(); 236 assert(*inner++ == m1); 237 assert(*inner++ == m2); 238 assert(inner == (*outer).end()); 239 240 // Segment 2: {m3} 241 ++outer; 242 assert(outer != v.end()); 243 inner = (*outer).begin(); 244 assert(*inner++ == m3); 245 assert(inner == (*outer).end()); 246 247 ++outer; 248 assert(outer == v.end()); 249 250 return true; 251 } 252 253 constexpr bool main_test() { 254 using namespace std::string_view_literals; 255 256 char short_sep = ' '; 257 auto long_sep = "12"sv; 258 259 // One separator. 260 { 261 std::array expected = {"abc"sv, "def"sv}; 262 test_one("abc def"sv, short_sep, expected); 263 test_one("abc12def"sv, long_sep, expected); 264 } 265 266 // Several separators in a row. 267 { 268 std::array expected = {"abc"sv, ""sv, ""sv, ""sv, "def"sv}; 269 test_one("abc def"sv, short_sep, expected); 270 test_one("abc12121212def"sv, long_sep, expected); 271 } 272 273 // Trailing separator. 274 { 275 std::array expected = {"abc"sv, "def"sv, ""sv}; 276 test_one("abc def "sv, short_sep, expected); 277 test_one("abc12def12"sv, long_sep, expected); 278 } 279 280 // Leading separator. 281 { 282 std::array expected = {""sv, "abc"sv, "def"sv}; 283 test_one(" abc def"sv, short_sep, expected); 284 test_one("12abc12def"sv, long_sep, expected); 285 } 286 287 // No separator. 288 { 289 std::array expected = {"abc"sv}; 290 test_one("abc"sv, short_sep, expected); 291 test_one("abc"sv, long_sep, expected); 292 } 293 294 // Input consisting of a single separator. 295 { 296 std::array expected = {""sv, ""sv}; 297 test_one(" "sv, short_sep, expected); 298 test_one("12"sv, long_sep, expected); 299 } 300 301 // Input consisting of only separators. 302 { 303 std::array expected = {""sv, ""sv, ""sv, ""sv}; 304 test_one(" "sv, short_sep, expected); 305 test_one("121212"sv, long_sep, expected); 306 } 307 308 // The separator and the string use the same character only. 309 { 310 auto overlapping_sep = "aaa"sv; 311 std::array expected = {""sv, "aa"sv}; 312 test_one("aaaaa"sv, overlapping_sep, expected); 313 } 314 315 // Many redundant separators. 316 { 317 std::array expected = {""sv, ""sv, "abc"sv, ""sv, ""sv, "def"sv, ""sv, ""sv}; 318 test_one(" abc def "sv, short_sep, expected); 319 test_one("1212abc121212def1212"sv, long_sep, expected); 320 } 321 322 // Separators after every character. 323 { 324 std::array expected = {""sv, "a"sv, "b"sv, "c"sv, ""sv}; 325 test_one(" a b c "sv, short_sep, expected); 326 test_one("12a12b12c12"sv, long_sep, expected); 327 } 328 329 // Overlap between the separator and the string (see https://wg21.link/lwg3505). 330 { 331 auto overlapping_sep = "ab"sv; 332 std::array expected = {"a"sv, "aa"sv, ""sv, "b"sv}; 333 test_one("aabaaababb"sv, overlapping_sep, expected); 334 } 335 336 // Empty input. 337 { 338 std::array<std::string_view, 0> expected = {}; 339 test_one(""sv, short_sep, expected); 340 test_one(""sv, long_sep, expected); 341 } 342 343 // Empty separator. 344 { 345 auto empty_sep = ""sv; 346 std::array expected = {"a"sv, "b"sv, "c"sv}; 347 test_one("abc"sv, empty_sep, expected); 348 test_one("abc"sv, empty_sep, expected); 349 } 350 351 // Terminating null as a separator. 352 { 353 std::array expected = {"abc"sv, "def"sv}; 354 test_one("abc\0def"sv, '\0', expected); 355 test_one("abc\0\0def"sv, "\0\0"sv, expected); 356 } 357 358 // Different character types. 359 { 360 // `char`. 361 test_function_call("abc def", ' ', std::array{"abc", "def"}); 362 #ifndef TEST_HAS_NO_WIDE_CHARACTERS 363 // `wchar_t`. 364 test_function_call(L"abc def", L' ', std::array{L"abc", L"def"}); 365 #endif 366 // `char8_t`. 367 test_function_call(u8"abc def", u8' ', std::array{u8"abc", u8"def"}); 368 // `char16_t`. 369 test_function_call(u"abc def", u' ', std::array{u"abc", u"def"}); 370 // `char32_t`. 371 test_function_call(U"abc def", U' ', std::array{U"abc", U"def"}); 372 } 373 374 // Non-character input. 375 { 376 std::array expected = {std::array{1, 2, 3}, std::array{4, 5, 6}}; 377 test_one(std::array{1, 2, 3, 0, 4, 5, 6}, 0, expected); 378 test_one(std::array{1, 2, 3, 0, 0, 0, 4, 5, 6}, std::array{0, 0, 0}, expected); 379 } 380 381 return true; 382 } 383 384 int main(int, char**) { 385 main_test(); 386 static_assert(main_test()); 387 388 test_string_literals(); 389 static_assert(test_string_literals()); 390 391 test_l_r_values(); 392 static_assert(test_l_r_values()); 393 394 test_string_literal_separator(); 395 static_assert(test_string_literal_separator()); 396 397 // Note: map is not `constexpr`, so this test is runtime-only. 398 test_nontrivial_characters(); 399 400 return 0; 401 } 402