xref: /llvm-project/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp (revision fb855eb941b6d740cc6560297d0b4d3201dcaf9f)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // UNSUPPORTED: c++03, c++11, c++14, c++17
10 
11 // template <class View, class Pattern>
12 // class std::ranges::lazy_split_view;
13 //
14 // These test check the output `lazy_split_view` produces for a variety of inputs, including many corner cases, with no
15 // restrictions on which member functions can be called.
16 
17 #include <ranges>
18 
19 #include <algorithm>
20 #include <array>
21 #include <cassert>
22 #include <concepts>
23 #include <map>
24 #include <string_view>
25 #include <string>
26 #include <utility>
27 #include <vector>
28 #include "types.h"
29 
30 // Basic utility to convert a range to a string-like type. This handles ranges
31 // that do not contain character types and can work with non-contiguous inputs.
32 template <class Char>
33 class BasicSmallString {
34   std::vector<Char> buffer_{};
35 
36 public:
BasicSmallString(std::basic_string_view<Char> v)37   constexpr BasicSmallString(std::basic_string_view<Char> v)
38     requires (std::same_as<Char, char> ||
39 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
40               std::same_as<Char, wchar_t> ||
41 #endif
42               std::same_as<Char, char8_t> ||
43               std::same_as<Char, char16_t> ||
44               std::same_as<Char, char32_t>)
45     : buffer_(v.begin(), v.end())
46   {}
47 
48   template <class I, class S>
BasicSmallString(I b,const S & e)49   constexpr BasicSmallString(I b, const S& e) {
50     for (; b != e; ++b) {
51       buffer_.push_back(*b);
52     }
53   }
54 
55   template <std::ranges::range R>
BasicSmallString(R && from)56   constexpr BasicSmallString(R&& from) : BasicSmallString(from.begin(), from.end()) {}
57 
operator ==(const BasicSmallString & lhs,const BasicSmallString & rhs)58   friend constexpr bool operator==(const BasicSmallString& lhs, const BasicSmallString& rhs) {
59     return lhs.buffer_ == rhs.buffer_;
60   }
61 };
62 
63 template <std::ranges::view View, std::ranges::range Expected>
is_equal(View & view,const Expected & expected)64 constexpr bool is_equal(View& view, const Expected& expected) {
65   using Char = std::ranges::range_value_t<std::ranges::range_value_t<View>>;
66   using Str = BasicSmallString<Char>;
67 
68   auto actual_it = view.begin();
69   auto expected_it = expected.begin();
70   for (; actual_it != view.end() && expected_it != expected.end(); ++actual_it, ++expected_it) {
71     if (Str(*actual_it) != Str(*expected_it))
72       return false;
73   }
74 
75   return actual_it == view.end() && expected_it == expected.end();
76 }
77 
78 template <class T, class Separator, class U, std::size_t M>
test_function_call(T && input,Separator && separator,std::array<U,M> expected)79 constexpr bool test_function_call(T&& input, Separator&& separator, std::array<U, M> expected) {
80   std::ranges::lazy_split_view v(input, separator);
81   return is_equal(v, expected);
82 }
83 
84 template <class T, class Separator, class U, std::size_t M>
test_with_piping(T && input,Separator && separator,std::array<U,M> expected)85 constexpr bool test_with_piping(T&& input, Separator&& separator, std::array<U, M> expected) {
86   auto expected_it = expected.begin();
87   for (auto e : input | std::ranges::views::lazy_split(separator)) {
88     if (expected_it == expected.end())
89       return false;
90     if (!std::ranges::equal(e, *expected_it))
91       return false;
92 
93     ++expected_it;
94   }
95 
96   return expected_it == expected.end();
97 }
98 
test_l_r_values()99 constexpr bool test_l_r_values() {
100   using namespace std::string_view_literals;
101 
102   // Both lvalues and rvalues can be used as input.
103   {
104     // Lvalues.
105     {
106       auto input = "abc"sv;
107       auto sep = " "sv;
108       [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
109     }
110 
111     // Const lvalues.
112     {
113       const auto input = "abc"sv;
114       const auto sep = " "sv;
115       [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
116     }
117 
118     // Rvalues.
119     {
120       auto input = "abc"sv;
121       auto sep = " "sv;
122       [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
123     }
124 
125     // Const rvalues.
126     {
127       const auto input = "abc"sv;
128       const auto sep = " "sv;
129       [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
130     }
131   }
132 
133   return true;
134 }
135 
test_string_literal_separator()136 constexpr bool test_string_literal_separator() {
137   using namespace std::string_view_literals;
138 
139   // Splitting works as expected when the separator is a single character literal.
140   {
141     std::ranges::lazy_split_view v("abc def"sv, ' ');
142     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
143   }
144 
145   // Counterintuitively, a seemingly equivalent separator expressed as a string literal doesn't match anything. This is
146   // because of the implicit terminating null in the literal.
147   {
148     std::ranges::lazy_split_view v("abc def"sv, " ");
149     assert(is_equal(v, std::array{"abc def"sv}));
150   }
151 
152   // To illustrate the previous point further, the separator is actually a two-character string literal: `{' ', '\0'}`.
153   // Should the input string contain that two-character sequence, the separator would match.
154   {
155     std::ranges::lazy_split_view v("abc \0def"sv, " ");
156     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
157   }
158 
159   return true;
160 }
161 
162 // Make sure that a string literal and a `string_view` produce the same results (which isn't always the case, see
163 // below).
164 template <class T>
sv(T && str)165 constexpr std::string_view sv(T&& str) {
166   return std::string_view(str);
167 };
168 
169 template <class T, class Separator, class U, std::size_t M>
test_one(T && input,Separator && separator,std::array<U,M> expected)170 constexpr void test_one(T&& input, Separator&& separator, std::array<U, M> expected) {
171   assert(test_function_call(input, separator, expected));
172   assert(test_with_piping(input, separator, expected));
173 
174   // In addition to the `(ForwardView, ForwardView)` case, test the `(ForwardView, tiny-range)` and `(InputView,
175   // tiny-range)` cases (all of which have unique code paths).
176   if constexpr (std::is_same_v<std::remove_reference_t<Separator>, char>) {
177     assert(test_function_call(CopyableView(input), ForwardTinyView(separator), expected));
178     assert(test_with_piping(CopyableView(input), ForwardTinyView(separator), expected));
179 
180     assert(test_function_call(InputView(input), ForwardTinyView(separator), expected));
181     assert(test_with_piping(InputView(input), ForwardTinyView(separator), expected));
182   }
183 }
184 
test_string_literals()185 constexpr bool test_string_literals() {
186   // These tests show characteristic examples of how using string literals with `lazy_split_view` produces unexpected
187   // results due to the implicit terminating null that is treated as part of the range.
188 
189   using namespace std::string_view_literals;
190 
191   char short_sep = ' ';
192   auto long_sep = "12"sv;
193 
194   // When splitting a string literal, only the last segment will be null-terminated (getting the terminating null from
195   // the original range).
196   {
197     std::array expected = {"abc"sv, std::string_view("def", sizeof("def"))};
198 
199     assert(test_function_call("abc def", short_sep, expected));
200     assert(test_with_piping("abc def", short_sep, expected));
201     assert(test_function_call("abc12def", long_sep, expected));
202     assert(test_with_piping("abc12def", long_sep, expected));
203   }
204 
205   // Empty string.
206   {
207     // Because an empty string literal contains an implicit terminating null, the output will contain one segment.
208     std::array expected = {std::string_view("", 1)};
209 
210     assert(test_function_call("", short_sep, expected));
211     assert(test_with_piping("", short_sep, expected));
212     assert(test_function_call("", long_sep, expected));
213     assert(test_with_piping("", long_sep, expected));
214   }
215 
216   // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
217   // are treated differently due to the presence of an implicit `\0` in the latter.
218   {
219     const char input[] = "abc def";
220     std::array expected_unsplit = {std::string_view(input, sizeof(input))};
221     std::array expected_split = {"abc"sv, std::string_view("def", sizeof("def"))};
222 
223     assert(test_function_call(input, " ", expected_unsplit));
224     assert(test_function_call("abc \0def", " ", expected_split));
225     // Note: string literals don't work with piping because arrays decay to pointers, and pointers don't model `range`.
226   }
227 
228   // Empty separator.
229   {
230     auto empty_sep = ""sv;
231     std::array expected = {"a"sv, "b"sv, "c"sv, "\0"sv};
232 
233     assert(test_function_call("abc", empty_sep, expected));
234     assert(test_with_piping("abc", empty_sep, expected));
235   }
236 
237   return true;
238 }
239 
test_nontrivial_characters()240 bool test_nontrivial_characters() {
241   // Try a deliberately heavyweight "character" type to see if it triggers any corner cases.
242 
243   using Map = std::map<std::string, int>;
244   using Vec = std::vector<Map>;
245 
246   Map sep = {{"yyy", 999}};
247   Map m1 = {
248     {"a", 1},
249     {"bc", 2},
250   };
251   Map m2 = {
252     {"def", 3},
253   };
254   Map m3 = {
255     {"g", 4},
256     {"hijk", 5},
257   };
258 
259   Vec expected1 = {m1, m2};
260   Vec expected2 = {m3};
261 
262   std::ranges::lazy_split_view v(Vec{m1, m2, sep, m3}, sep);
263 
264   // Segment 1: {m1, m2}
265   auto outer = v.begin();
266   assert(outer != v.end());
267   auto inner = (*outer).begin();
268   assert(*inner++ == m1);
269   assert(*inner++ == m2);
270   assert(inner == (*outer).end());
271 
272   // Segment 2: {m3}
273   ++outer;
274   assert(outer != v.end());
275   inner = (*outer).begin();
276   assert(*inner++ == m3);
277   assert(inner == (*outer).end());
278 
279   ++outer;
280   assert(outer == v.end());
281 
282   return true;
283 }
284 
main_test()285 constexpr bool main_test() {
286   using namespace std::string_view_literals;
287 
288   char short_sep = ' ';
289   auto long_sep = "12"sv;
290 
291   // One separator.
292   {
293     std::array expected = {"abc"sv, "def"sv};
294     test_one("abc def"sv, short_sep, expected);
295     test_one("abc12def"sv, long_sep, expected);
296   }
297 
298   // Several separators in a row.
299   {
300     std::array expected = {"abc"sv, ""sv, ""sv, ""sv, "def"sv};
301     test_one("abc    def"sv, short_sep, expected);
302     test_one("abc12121212def"sv, long_sep, expected);
303   }
304 
305   // Trailing separator.
306   {
307     std::array expected = {"abc"sv, "def"sv, ""sv};
308     test_one("abc def "sv, short_sep, expected);
309     test_one("abc12def12"sv, long_sep, expected);
310   }
311 
312   // Leading separator.
313   {
314     std::array expected = {""sv, "abc"sv, "def"sv};
315     test_one(" abc def"sv, short_sep, expected);
316     test_one("12abc12def"sv, long_sep, expected);
317   }
318 
319   // No separator.
320   {
321     std::array expected = {"abc"sv};
322     test_one("abc"sv, short_sep, expected);
323     test_one("abc"sv, long_sep, expected);
324   }
325 
326   // Input consisting of a single separator.
327   {
328     std::array expected = {""sv, ""sv};
329     test_one(" "sv, short_sep, expected);
330     test_one("12"sv, long_sep, expected);
331   }
332 
333   // Input consisting of only separators.
334   {
335     std::array expected = {""sv, ""sv, ""sv, ""sv};
336     test_one("   "sv, short_sep, expected);
337     test_one("121212"sv, long_sep, expected);
338   }
339 
340   // The separator and the string use the same character only.
341   {
342     auto overlapping_sep = "aaa"sv;
343     std::array expected = {""sv, "aa"sv};
344     test_one("aaaaa"sv, overlapping_sep, expected);
345   }
346 
347   // Many redundant separators.
348   {
349     std::array expected = {""sv, ""sv, "abc"sv, ""sv, ""sv, "def"sv, ""sv, ""sv};
350     test_one("  abc   def  "sv, short_sep, expected);
351     test_one("1212abc121212def1212"sv, long_sep, expected);
352   }
353 
354   // Separators after every character.
355   {
356     std::array expected = {""sv, "a"sv, "b"sv, "c"sv, ""sv};
357     test_one(" a b c "sv, short_sep, expected);
358     test_one("12a12b12c12"sv, long_sep, expected);
359   }
360 
361   // Overlap between the separator and the string (see https://wg21.link/lwg3505).
362   {
363     auto overlapping_sep = "ab"sv;
364     std::array expected = {"a"sv, "aa"sv, ""sv, "b"sv};
365     test_one("aabaaababb"sv, overlapping_sep, expected);
366   }
367 
368   // Empty input.
369   {
370     std::array<std::string_view, 0> expected = {};
371     test_one(""sv, short_sep, expected);
372     test_one(""sv, long_sep, expected);
373   }
374 
375   // Empty separator.
376   {
377     auto empty_sep = ""sv;
378     std::array expected = {"a"sv, "b"sv, "c"sv};
379     test_one("abc"sv, empty_sep, expected);
380     test_one("abc"sv, empty_sep, expected);
381   }
382 
383   // Terminating null as a separator.
384   {
385     std::array expected = {"abc"sv, "def"sv};
386     test_one("abc\0def"sv, '\0', expected);
387     test_one("abc\0\0def"sv, "\0\0"sv, expected);
388   }
389 
390   // Different character types.
391   {
392     // `char`.
393     test_function_call("abc def", ' ', std::array{"abc", "def"});
394 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
395     // `wchar_t`.
396     test_function_call(L"abc def", L' ', std::array{L"abc", L"def"});
397 #endif
398     // `char8_t`.
399     test_function_call(u8"abc def", u8' ', std::array{u8"abc", u8"def"});
400     // `char16_t`.
401     test_function_call(u"abc def", u' ', std::array{u"abc", u"def"});
402     // `char32_t`.
403     test_function_call(U"abc def", U' ', std::array{U"abc", U"def"});
404   }
405 
406   // Non-character input.
407   {
408     std::array expected = {std::array{1, 2, 3}, std::array{4, 5, 6}};
409     test_one(std::array{1, 2, 3, 0, 4, 5, 6}, 0, expected);
410     test_one(std::array{1, 2, 3, 0, 0, 0, 4, 5, 6}, std::array{0, 0, 0}, expected);
411   }
412 
413   return true;
414 }
415 
416 int main(int, char**) {
417   main_test();
418   static_assert(main_test());
419 
420   test_string_literals();
421   static_assert(test_string_literals());
422 
423   test_l_r_values();
424   static_assert(test_l_r_values());
425 
426   test_string_literal_separator();
427   static_assert(test_string_literal_separator());
428 
429   // Note: map is not `constexpr`, so this test is runtime-only.
430   test_nontrivial_characters();
431 
432   return 0;
433 }
434