xref: /llvm-project/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp (revision e53c461bf3f0feebb4fd6b43e05a0047f8edb945)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // UNSUPPORTED: c++03, c++11, c++14, c++17
10 // UNSUPPORTED: libcpp-has-no-incomplete-ranges
11 
12 // template <class View, class Pattern>
13 // class std::ranges::lazy_split_view;
14 //
15 // These test check the output `lazy_split_view` produces for a variety of inputs, including many corner cases, with no
16 // restrictions on which member functions can be called.
17 
18 #include <ranges>
19 
20 #include <algorithm>
21 #include <array>
22 #include <cassert>
23 #include <map>
24 #include <string>
25 #include <string_view>
26 #include <utility>
27 #include <vector>
28 #include "small_string.h"
29 #include "types.h"
30 
31 template <std::ranges::view View, std::ranges::range Expected>
32 constexpr bool is_equal(View& view, const Expected& expected) {
33   using Char = std::ranges::range_value_t<std::ranges::range_value_t<View>>;
34   using Str = BasicSmallString<Char>;
35 
36   auto actual_it = view.begin();
37   auto expected_it = expected.begin();
38   for (; actual_it != view.end() && expected_it != expected.end(); ++actual_it, ++expected_it) {
39     if (Str(*actual_it) != Str(*expected_it))
40       return false;
41   }
42 
43   return actual_it == view.end() && expected_it == expected.end();
44 }
45 
46 template <class T, class Separator, class U, size_t M>
47 constexpr bool test_function_call(T&& input, Separator&& separator, std::array<U, M> expected) {
48   std::ranges::lazy_split_view v(input, separator);
49   return is_equal(v, expected);
50 }
51 
52 template <class T, class Separator, class U, size_t M>
53 constexpr bool test_with_piping(T&& input, Separator&& separator, std::array<U, M> expected) {
54   auto expected_it = expected.begin();
55   for (auto e : input | std::ranges::views::lazy_split(separator)) {
56     if (expected_it == expected.end())
57       return false;
58     if (SmallString(e) != *expected_it)
59       return false;
60 
61     ++expected_it;
62   }
63 
64   return expected_it == expected.end();
65 }
66 
67 constexpr bool test_l_r_values() {
68   using namespace std::string_view_literals;
69 
70   // Both lvalues and rvalues can be used as input.
71   {
72     // Lvalues.
73     {
74       auto input = "abc"sv;
75       auto sep = " "sv;
76       [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
77     }
78 
79     // Const lvalues.
80     {
81       const auto input = "abc"sv;
82       const auto sep = " "sv;
83       [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
84     }
85 
86     // Rvalues.
87     {
88       auto input = "abc"sv;
89       auto sep = " "sv;
90       [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
91     }
92 
93     // Const rvalues.
94     {
95       const auto input = "abc"sv;
96       const auto sep = " "sv;
97       [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
98     }
99   }
100 
101   return true;
102 }
103 
104 constexpr bool test_string_literal_separator() {
105   using namespace std::string_view_literals;
106 
107   // Splitting works as expected when the separator is a single character literal.
108   {
109     std::ranges::lazy_split_view v("abc def"sv, ' ');
110     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
111   }
112 
113   // Counterintuitively, a seemingly equivalent separator expressed as a string literal doesn't match anything. This is
114   // because of the implicit terminating null in the literal.
115   {
116     std::ranges::lazy_split_view v("abc def"sv, " ");
117     assert(is_equal(v, std::array{"abc def"sv}));
118   }
119 
120   // To illustrate the previous point further, the separator is actually a two-character string literal: `{' ', '\0'}`.
121   // Should the input string contain that two-character sequence, the separator would match.
122   {
123     std::ranges::lazy_split_view v("abc \0def"sv, " ");
124     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
125   }
126 
127   return true;
128 }
129 
130 // Make sure that a string literal and a `string_view` produce the same results (which isn't always the case, see
131 // below).
132 template <class T>
133 constexpr std::string_view sv(T&& str) {
134   return std::string_view(str);
135 };
136 
137 template <class T, class Separator, class U, size_t M>
138 constexpr void test_one(T&& input, Separator&& separator, std::array<U, M> expected) {
139   assert(test_function_call(input, separator, expected));
140   assert(test_with_piping(input, separator, expected));
141 
142   // In addition to the `(ForwardView, ForwardView)` case, test the `(ForwardView, tiny-range)` and `(InputView,
143   // tiny-range)` cases (all of which have unique code paths).
144   if constexpr (std::is_same_v<std::remove_reference_t<Separator>, char>) {
145     assert(test_function_call(CopyableView(input), ForwardTinyView(separator), expected));
146     assert(test_with_piping(CopyableView(input), ForwardTinyView(separator), expected));
147 
148     assert(test_function_call(InputView(input), ForwardTinyView(separator), expected));
149     assert(test_with_piping(InputView(input), ForwardTinyView(separator), expected));
150   }
151 }
152 
153 constexpr bool test_string_literals() {
154   // These tests show characteristic examples of how using string literals with `lazy_split_view` produces unexpected
155   // results due to the implicit terminating null that is treated as part of the range.
156 
157   using namespace std::string_view_literals;
158 
159   char short_sep = ' ';
160   auto long_sep = "12"sv;
161 
162   // When splitting a string literal, only the last segment will be null-terminated (getting the terminating null from
163   // the original range).
164   {
165     std::array expected = {"abc"sv, std::string_view("def", sizeof("def"))};
166 
167     assert(test_function_call("abc def", short_sep, expected));
168     assert(test_with_piping("abc def", short_sep, expected));
169     assert(test_function_call("abc12def", long_sep, expected));
170     assert(test_with_piping("abc12def", long_sep, expected));
171   }
172 
173   // Empty string.
174   {
175     // Because an empty string literal contains an implicit terminating null, the output will contain one segment.
176     std::array expected = {std::string_view("", 1)};
177 
178     assert(test_function_call("", short_sep, expected));
179     assert(test_with_piping("", short_sep, expected));
180     assert(test_function_call("", long_sep, expected));
181     assert(test_with_piping("", long_sep, expected));
182   }
183 
184   // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
185   // are treated differently due to the presence of an implicit `\0` in the latter.
186   {
187     const char input[] = "abc def";
188     std::array expected_unsplit = {std::string_view(input, sizeof(input))};
189     std::array expected_split = {"abc"sv, std::string_view("def", sizeof("def"))};
190 
191     assert(test_function_call(input, " ", expected_unsplit));
192     assert(test_function_call("abc \0def", " ", expected_split));
193     // Note: string literals don't work with piping because arrays decay to pointers, and pointers don't model `range`.
194   }
195 
196   // Empty separator.
197   {
198     auto empty_sep = ""sv;
199     std::array expected = {"a"sv, "b"sv, "c"sv, "\0"sv};
200 
201     assert(test_function_call("abc", empty_sep, expected));
202     assert(test_with_piping("abc", empty_sep, expected));
203   }
204 
205   return true;
206 }
207 
208 bool test_nontrivial_characters() {
209   // Try a deliberately heavyweight "character" type to see if it triggers any corner cases.
210 
211   using Map = std::map<std::string, int>;
212   using Vec = std::vector<Map>;
213 
214   Map sep = {{"yyy", 999}};
215   Map m1 = {
216     {"a", 1},
217     {"bc", 2},
218   };
219   Map m2 = {
220     {"def", 3},
221   };
222   Map m3 = {
223     {"g", 4},
224     {"hijk", 5},
225   };
226 
227   Vec expected1 = {m1, m2};
228   Vec expected2 = {m3};
229 
230   std::ranges::lazy_split_view v(Vec{m1, m2, sep, m3}, sep);
231 
232   // Segment 1: {m1, m2}
233   auto outer = v.begin();
234   assert(outer != v.end());
235   auto inner = (*outer).begin();
236   assert(*inner++ == m1);
237   assert(*inner++ == m2);
238   assert(inner == (*outer).end());
239 
240   // Segment 2: {m3}
241   ++outer;
242   assert(outer != v.end());
243   inner = (*outer).begin();
244   assert(*inner++ == m3);
245   assert(inner == (*outer).end());
246 
247   ++outer;
248   assert(outer == v.end());
249 
250   return true;
251 }
252 
253 constexpr bool main_test() {
254   using namespace std::string_view_literals;
255 
256   char short_sep = ' ';
257   auto long_sep = "12"sv;
258 
259   // One separator.
260   {
261     std::array expected = {"abc"sv, "def"sv};
262     test_one("abc def"sv, short_sep, expected);
263     test_one("abc12def"sv, long_sep, expected);
264   }
265 
266   // Several separators in a row.
267   {
268     std::array expected = {"abc"sv, ""sv, ""sv, ""sv, "def"sv};
269     test_one("abc    def"sv, short_sep, expected);
270     test_one("abc12121212def"sv, long_sep, expected);
271   }
272 
273   // Trailing separator.
274   {
275     std::array expected = {"abc"sv, "def"sv, ""sv};
276     test_one("abc def "sv, short_sep, expected);
277     test_one("abc12def12"sv, long_sep, expected);
278   }
279 
280   // Leading separator.
281   {
282     std::array expected = {""sv, "abc"sv, "def"sv};
283     test_one(" abc def"sv, short_sep, expected);
284     test_one("12abc12def"sv, long_sep, expected);
285   }
286 
287   // No separator.
288   {
289     std::array expected = {"abc"sv};
290     test_one("abc"sv, short_sep, expected);
291     test_one("abc"sv, long_sep, expected);
292   }
293 
294   // Input consisting of a single separator.
295   {
296     std::array expected = {""sv, ""sv};
297     test_one(" "sv, short_sep, expected);
298     test_one("12"sv, long_sep, expected);
299   }
300 
301   // Input consisting of only separators.
302   {
303     std::array expected = {""sv, ""sv, ""sv, ""sv};
304     test_one("   "sv, short_sep, expected);
305     test_one("121212"sv, long_sep, expected);
306   }
307 
308   // The separator and the string use the same character only.
309   {
310     auto overlapping_sep = "aaa"sv;
311     std::array expected = {""sv, "aa"sv};
312     test_one("aaaaa"sv, overlapping_sep, expected);
313   }
314 
315   // Many redundant separators.
316   {
317     std::array expected = {""sv, ""sv, "abc"sv, ""sv, ""sv, "def"sv, ""sv, ""sv};
318     test_one("  abc   def  "sv, short_sep, expected);
319     test_one("1212abc121212def1212"sv, long_sep, expected);
320   }
321 
322   // Separators after every character.
323   {
324     std::array expected = {""sv, "a"sv, "b"sv, "c"sv, ""sv};
325     test_one(" a b c "sv, short_sep, expected);
326     test_one("12a12b12c12"sv, long_sep, expected);
327   }
328 
329   // Overlap between the separator and the string (see https://wg21.link/lwg3505).
330   {
331     auto overlapping_sep = "ab"sv;
332     std::array expected = {"a"sv, "aa"sv, ""sv, "b"sv};
333     test_one("aabaaababb"sv, overlapping_sep, expected);
334   }
335 
336   // Empty input.
337   {
338     std::array<std::string_view, 0> expected = {};
339     test_one(""sv, short_sep, expected);
340     test_one(""sv, long_sep, expected);
341   }
342 
343   // Empty separator.
344   {
345     auto empty_sep = ""sv;
346     std::array expected = {"a"sv, "b"sv, "c"sv};
347     test_one("abc"sv, empty_sep, expected);
348     test_one("abc"sv, empty_sep, expected);
349   }
350 
351   // Terminating null as a separator.
352   {
353     std::array expected = {"abc"sv, "def"sv};
354     test_one("abc\0def"sv, '\0', expected);
355     test_one("abc\0\0def"sv, "\0\0"sv, expected);
356   }
357 
358   // Different character types.
359   {
360     // `char`.
361     test_function_call("abc def", ' ', std::array{"abc", "def"});
362 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
363     // `wchar_t`.
364     test_function_call(L"abc def", L' ', std::array{L"abc", L"def"});
365 #endif
366     // `char8_t`.
367     test_function_call(u8"abc def", u8' ', std::array{u8"abc", u8"def"});
368     // `char16_t`.
369     test_function_call(u"abc def", u' ', std::array{u"abc", u"def"});
370     // `char32_t`.
371     test_function_call(U"abc def", U' ', std::array{U"abc", U"def"});
372   }
373 
374   // Non-character input.
375   {
376     std::array expected = {std::array{1, 2, 3}, std::array{4, 5, 6}};
377     test_one(std::array{1, 2, 3, 0, 4, 5, 6}, 0, expected);
378     test_one(std::array{1, 2, 3, 0, 0, 0, 4, 5, 6}, std::array{0, 0, 0}, expected);
379   }
380 
381   return true;
382 }
383 
384 int main(int, char**) {
385   main_test();
386   static_assert(main_test());
387 
388   test_string_literals();
389   static_assert(test_string_literals());
390 
391   test_l_r_values();
392   static_assert(test_l_r_values());
393 
394   test_string_literal_separator();
395   static_assert(test_string_literal_separator());
396 
397   // Note: map is not `constexpr`, so this test is runtime-only.
398   test_nontrivial_characters();
399 
400   return 0;
401 }
402