xref: /llvm-project/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp (revision b8cb1dc9ea87faa8e8e9ab7a31710a8c0bb8b084)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // UNSUPPORTED: c++03, c++11, c++14, c++17
10 
11 // template <class View, class Pattern>
12 // class std::ranges::lazy_split_view;
13 //
14 // These test check the output `lazy_split_view` produces for a variety of inputs, including many corner cases, with no
15 // restrictions on which member functions can be called.
16 
17 #include <ranges>
18 
19 #include <algorithm>
20 #include <array>
21 #include <cassert>
22 #include <map>
23 #include <string>
24 #include <string_view>
25 #include <utility>
26 #include <vector>
27 #include "types.h"
28 
29 // A constexpr-friendly lightweight string, primarily useful for comparisons.
30 // Unlike `std::string_view`, it copies the given string into an
31 // internal buffer and can work with non-contiguous inputs.
32 template <class Char>
33 class BasicSmallString {
34   std::basic_string<Char> buffer_{};
35 
36 public:
37   constexpr BasicSmallString(std::basic_string_view<Char> v) : buffer_(v) {}
38 
39   template <class I, class S>
40   constexpr BasicSmallString(I b, const S& e) {
41     for (; b != e; ++b) {
42       buffer_ += *b;
43     }
44   }
45 
46   template <std::ranges::range R>
47   constexpr BasicSmallString(R&& from) : BasicSmallString(from.begin(), from.end()) {}
48 
49   friend constexpr bool operator==(const BasicSmallString& lhs, const BasicSmallString& rhs) {
50     return lhs.buffer_ == rhs.buffer_;
51   }
52 };
53 
54 template <std::ranges::view View, std::ranges::range Expected>
55 constexpr bool is_equal(View& view, const Expected& expected) {
56   using Char = std::ranges::range_value_t<std::ranges::range_value_t<View>>;
57   using Str = BasicSmallString<Char>;
58 
59   auto actual_it = view.begin();
60   auto expected_it = expected.begin();
61   for (; actual_it != view.end() && expected_it != expected.end(); ++actual_it, ++expected_it) {
62     if (Str(*actual_it) != Str(*expected_it))
63       return false;
64   }
65 
66   return actual_it == view.end() && expected_it == expected.end();
67 }
68 
69 template <class T, class Separator, class U, size_t M>
70 constexpr bool test_function_call(T&& input, Separator&& separator, std::array<U, M> expected) {
71   std::ranges::lazy_split_view v(input, separator);
72   return is_equal(v, expected);
73 }
74 
75 template <class T, class Separator, class U, size_t M>
76 constexpr bool test_with_piping(T&& input, Separator&& separator, std::array<U, M> expected) {
77   auto expected_it = expected.begin();
78   for (auto e : input | std::ranges::views::lazy_split(separator)) {
79     if (expected_it == expected.end())
80       return false;
81     if (!std::ranges::equal(e, *expected_it))
82       return false;
83 
84     ++expected_it;
85   }
86 
87   return expected_it == expected.end();
88 }
89 
90 constexpr bool test_l_r_values() {
91   using namespace std::string_view_literals;
92 
93   // Both lvalues and rvalues can be used as input.
94   {
95     // Lvalues.
96     {
97       auto input = "abc"sv;
98       auto sep = " "sv;
99       [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
100     }
101 
102     // Const lvalues.
103     {
104       const auto input = "abc"sv;
105       const auto sep = " "sv;
106       [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
107     }
108 
109     // Rvalues.
110     {
111       auto input = "abc"sv;
112       auto sep = " "sv;
113       [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
114     }
115 
116     // Const rvalues.
117     {
118       const auto input = "abc"sv;
119       const auto sep = " "sv;
120       [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
121     }
122   }
123 
124   return true;
125 }
126 
127 constexpr bool test_string_literal_separator() {
128   using namespace std::string_view_literals;
129 
130   // Splitting works as expected when the separator is a single character literal.
131   {
132     std::ranges::lazy_split_view v("abc def"sv, ' ');
133     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
134   }
135 
136   // Counterintuitively, a seemingly equivalent separator expressed as a string literal doesn't match anything. This is
137   // because of the implicit terminating null in the literal.
138   {
139     std::ranges::lazy_split_view v("abc def"sv, " ");
140     assert(is_equal(v, std::array{"abc def"sv}));
141   }
142 
143   // To illustrate the previous point further, the separator is actually a two-character string literal: `{' ', '\0'}`.
144   // Should the input string contain that two-character sequence, the separator would match.
145   {
146     std::ranges::lazy_split_view v("abc \0def"sv, " ");
147     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
148   }
149 
150   return true;
151 }
152 
153 // Make sure that a string literal and a `string_view` produce the same results (which isn't always the case, see
154 // below).
155 template <class T>
156 constexpr std::string_view sv(T&& str) {
157   return std::string_view(str);
158 };
159 
160 template <class T, class Separator, class U, size_t M>
161 constexpr void test_one(T&& input, Separator&& separator, std::array<U, M> expected) {
162   assert(test_function_call(input, separator, expected));
163   assert(test_with_piping(input, separator, expected));
164 
165   // In addition to the `(ForwardView, ForwardView)` case, test the `(ForwardView, tiny-range)` and `(InputView,
166   // tiny-range)` cases (all of which have unique code paths).
167   if constexpr (std::is_same_v<std::remove_reference_t<Separator>, char>) {
168     assert(test_function_call(CopyableView(input), ForwardTinyView(separator), expected));
169     assert(test_with_piping(CopyableView(input), ForwardTinyView(separator), expected));
170 
171     assert(test_function_call(InputView(input), ForwardTinyView(separator), expected));
172     assert(test_with_piping(InputView(input), ForwardTinyView(separator), expected));
173   }
174 }
175 
176 constexpr bool test_string_literals() {
177   // These tests show characteristic examples of how using string literals with `lazy_split_view` produces unexpected
178   // results due to the implicit terminating null that is treated as part of the range.
179 
180   using namespace std::string_view_literals;
181 
182   char short_sep = ' ';
183   auto long_sep = "12"sv;
184 
185   // When splitting a string literal, only the last segment will be null-terminated (getting the terminating null from
186   // the original range).
187   {
188     std::array expected = {"abc"sv, std::string_view("def", sizeof("def"))};
189 
190     assert(test_function_call("abc def", short_sep, expected));
191     assert(test_with_piping("abc def", short_sep, expected));
192     assert(test_function_call("abc12def", long_sep, expected));
193     assert(test_with_piping("abc12def", long_sep, expected));
194   }
195 
196   // Empty string.
197   {
198     // Because an empty string literal contains an implicit terminating null, the output will contain one segment.
199     std::array expected = {std::string_view("", 1)};
200 
201     assert(test_function_call("", short_sep, expected));
202     assert(test_with_piping("", short_sep, expected));
203     assert(test_function_call("", long_sep, expected));
204     assert(test_with_piping("", long_sep, expected));
205   }
206 
207   // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
208   // are treated differently due to the presence of an implicit `\0` in the latter.
209   {
210     const char input[] = "abc def";
211     std::array expected_unsplit = {std::string_view(input, sizeof(input))};
212     std::array expected_split = {"abc"sv, std::string_view("def", sizeof("def"))};
213 
214     assert(test_function_call(input, " ", expected_unsplit));
215     assert(test_function_call("abc \0def", " ", expected_split));
216     // Note: string literals don't work with piping because arrays decay to pointers, and pointers don't model `range`.
217   }
218 
219   // Empty separator.
220   {
221     auto empty_sep = ""sv;
222     std::array expected = {"a"sv, "b"sv, "c"sv, "\0"sv};
223 
224     assert(test_function_call("abc", empty_sep, expected));
225     assert(test_with_piping("abc", empty_sep, expected));
226   }
227 
228   return true;
229 }
230 
231 bool test_nontrivial_characters() {
232   // Try a deliberately heavyweight "character" type to see if it triggers any corner cases.
233 
234   using Map = std::map<std::string, int>;
235   using Vec = std::vector<Map>;
236 
237   Map sep = {{"yyy", 999}};
238   Map m1 = {
239     {"a", 1},
240     {"bc", 2},
241   };
242   Map m2 = {
243     {"def", 3},
244   };
245   Map m3 = {
246     {"g", 4},
247     {"hijk", 5},
248   };
249 
250   Vec expected1 = {m1, m2};
251   Vec expected2 = {m3};
252 
253   std::ranges::lazy_split_view v(Vec{m1, m2, sep, m3}, sep);
254 
255   // Segment 1: {m1, m2}
256   auto outer = v.begin();
257   assert(outer != v.end());
258   auto inner = (*outer).begin();
259   assert(*inner++ == m1);
260   assert(*inner++ == m2);
261   assert(inner == (*outer).end());
262 
263   // Segment 2: {m3}
264   ++outer;
265   assert(outer != v.end());
266   inner = (*outer).begin();
267   assert(*inner++ == m3);
268   assert(inner == (*outer).end());
269 
270   ++outer;
271   assert(outer == v.end());
272 
273   return true;
274 }
275 
276 constexpr bool main_test() {
277   using namespace std::string_view_literals;
278 
279   char short_sep = ' ';
280   auto long_sep = "12"sv;
281 
282   // One separator.
283   {
284     std::array expected = {"abc"sv, "def"sv};
285     test_one("abc def"sv, short_sep, expected);
286     test_one("abc12def"sv, long_sep, expected);
287   }
288 
289   // Several separators in a row.
290   {
291     std::array expected = {"abc"sv, ""sv, ""sv, ""sv, "def"sv};
292     test_one("abc    def"sv, short_sep, expected);
293     test_one("abc12121212def"sv, long_sep, expected);
294   }
295 
296   // Trailing separator.
297   {
298     std::array expected = {"abc"sv, "def"sv, ""sv};
299     test_one("abc def "sv, short_sep, expected);
300     test_one("abc12def12"sv, long_sep, expected);
301   }
302 
303   // Leading separator.
304   {
305     std::array expected = {""sv, "abc"sv, "def"sv};
306     test_one(" abc def"sv, short_sep, expected);
307     test_one("12abc12def"sv, long_sep, expected);
308   }
309 
310   // No separator.
311   {
312     std::array expected = {"abc"sv};
313     test_one("abc"sv, short_sep, expected);
314     test_one("abc"sv, long_sep, expected);
315   }
316 
317   // Input consisting of a single separator.
318   {
319     std::array expected = {""sv, ""sv};
320     test_one(" "sv, short_sep, expected);
321     test_one("12"sv, long_sep, expected);
322   }
323 
324   // Input consisting of only separators.
325   {
326     std::array expected = {""sv, ""sv, ""sv, ""sv};
327     test_one("   "sv, short_sep, expected);
328     test_one("121212"sv, long_sep, expected);
329   }
330 
331   // The separator and the string use the same character only.
332   {
333     auto overlapping_sep = "aaa"sv;
334     std::array expected = {""sv, "aa"sv};
335     test_one("aaaaa"sv, overlapping_sep, expected);
336   }
337 
338   // Many redundant separators.
339   {
340     std::array expected = {""sv, ""sv, "abc"sv, ""sv, ""sv, "def"sv, ""sv, ""sv};
341     test_one("  abc   def  "sv, short_sep, expected);
342     test_one("1212abc121212def1212"sv, long_sep, expected);
343   }
344 
345   // Separators after every character.
346   {
347     std::array expected = {""sv, "a"sv, "b"sv, "c"sv, ""sv};
348     test_one(" a b c "sv, short_sep, expected);
349     test_one("12a12b12c12"sv, long_sep, expected);
350   }
351 
352   // Overlap between the separator and the string (see https://wg21.link/lwg3505).
353   {
354     auto overlapping_sep = "ab"sv;
355     std::array expected = {"a"sv, "aa"sv, ""sv, "b"sv};
356     test_one("aabaaababb"sv, overlapping_sep, expected);
357   }
358 
359   // Empty input.
360   {
361     std::array<std::string_view, 0> expected = {};
362     test_one(""sv, short_sep, expected);
363     test_one(""sv, long_sep, expected);
364   }
365 
366   // Empty separator.
367   {
368     auto empty_sep = ""sv;
369     std::array expected = {"a"sv, "b"sv, "c"sv};
370     test_one("abc"sv, empty_sep, expected);
371     test_one("abc"sv, empty_sep, expected);
372   }
373 
374   // Terminating null as a separator.
375   {
376     std::array expected = {"abc"sv, "def"sv};
377     test_one("abc\0def"sv, '\0', expected);
378     test_one("abc\0\0def"sv, "\0\0"sv, expected);
379   }
380 
381   // Different character types.
382   {
383     // `char`.
384     test_function_call("abc def", ' ', std::array{"abc", "def"});
385 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
386     // `wchar_t`.
387     test_function_call(L"abc def", L' ', std::array{L"abc", L"def"});
388 #endif
389     // `char8_t`.
390     test_function_call(u8"abc def", u8' ', std::array{u8"abc", u8"def"});
391     // `char16_t`.
392     test_function_call(u"abc def", u' ', std::array{u"abc", u"def"});
393     // `char32_t`.
394     test_function_call(U"abc def", U' ', std::array{U"abc", U"def"});
395   }
396 
397   // Non-character input.
398   {
399     std::array expected = {std::array{1, 2, 3}, std::array{4, 5, 6}};
400     test_one(std::array{1, 2, 3, 0, 4, 5, 6}, 0, expected);
401     test_one(std::array{1, 2, 3, 0, 0, 0, 4, 5, 6}, std::array{0, 0, 0}, expected);
402   }
403 
404   return true;
405 }
406 
407 int main(int, char**) {
408   main_test();
409   static_assert(main_test());
410 
411   test_string_literals();
412   static_assert(test_string_literals());
413 
414   test_l_r_values();
415   static_assert(test_l_r_values());
416 
417   test_string_literal_separator();
418   static_assert(test_string_literal_separator());
419 
420   // Note: map is not `constexpr`, so this test is runtime-only.
421   test_nontrivial_characters();
422 
423   return 0;
424 }
425