xref: /llvm-project/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp (revision fb855eb941b6d740cc6560297d0b4d3201dcaf9f)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // UNSUPPORTED: c++03, c++11, c++14, c++17
10 
11 // Some basic examples of how split_view might be used in the wild. This is a general
12 // collection of sample algorithms and functions that try to mock general usage of
13 // this view.
14 
15 // These test check the output `split_view` produces for a variety of inputs, including many corner cases, with no
16 // restrictions on which member functions can be called.
17 
18 #include <algorithm>
19 #include <array>
20 #include <cassert>
21 #include <concepts>
22 #include <map>
23 #include <ranges>
24 #include <string>
25 #include <string_view>
26 #include <utility>
27 #include <vector>
28 
29 #include "test_macros.h"
30 
31 template <std::ranges::view View, std::ranges::range Expected>
is_equal(View & view,const Expected & expected)32 constexpr bool is_equal(View& view, const Expected& expected) {
33   return std::ranges::equal(view, expected, std::ranges::equal);
34 }
35 
36 template <class T, class Separator, class U, std::size_t M>
test_function_call(T && input,Separator && separator,std::array<U,M> expected)37 constexpr bool test_function_call(T&& input, Separator&& separator, std::array<U, M> expected) {
38   std::ranges::split_view v(input, separator);
39   return is_equal(v, expected);
40 }
41 
42 template <class T, class Separator, class U, std::size_t M>
test_with_piping(T && input,Separator && separator,std::array<U,M> expected)43 constexpr bool test_with_piping(T&& input, Separator&& separator, std::array<U, M> expected) {
44   auto expected_it = expected.begin();
45   for (auto e : input | std::ranges::views::split(separator)) {
46     if (expected_it == expected.end())
47       return false;
48     if (!std::ranges::equal(e, *expected_it))
49       return false;
50 
51     ++expected_it;
52   }
53 
54   return expected_it == expected.end();
55 }
56 
test_l_r_values()57 constexpr bool test_l_r_values() {
58   using namespace std::string_view_literals;
59 
60   // Both lvalues and rvalues can be used as input.
61   {
62     // Lvalues.
63     {
64       auto input = "abc"sv;
65       auto sep   = " "sv;
66       [[maybe_unused]] std::ranges::split_view v(input, sep);
67     }
68 
69     // Const lvalues.
70     {
71       const auto input = "abc"sv;
72       const auto sep   = " "sv;
73       [[maybe_unused]] std::ranges::split_view v(input, sep);
74     }
75 
76     // Rvalues.
77     {
78       auto input = "abc"sv;
79       auto sep   = " "sv;
80       [[maybe_unused]] std::ranges::split_view v(std::move(input), std::move(sep));
81     }
82 
83     // Const rvalues.
84     {
85       const auto input = "abc"sv;
86       const auto sep   = " "sv;
87       [[maybe_unused]] std::ranges::split_view v(std::move(input), std::move(sep));
88     }
89   }
90 
91   return true;
92 }
93 
test_string_literal_separator()94 constexpr bool test_string_literal_separator() {
95   using namespace std::string_view_literals;
96 
97   // Splitting works as expected when the separator is a single character literal.
98   {
99     std::ranges::split_view v("abc def"sv, ' ');
100     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
101   }
102 
103   // Counterintuitively, a seemingly equivalent separator expressed as a string literal doesn't match anything. This is
104   // because of the implicit terminating null in the literal.
105   {
106     std::ranges::split_view v("abc def"sv, " ");
107     assert(is_equal(v, std::array{"abc def"sv}));
108   }
109 
110   // To illustrate the previous point further, the separator is actually a two-character string literal: `{' ', '\0'}`.
111   // Should the input string contain that two-character sequence, the separator would match.
112   {
113     std::ranges::split_view v("abc \0def"sv, " ");
114     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
115   }
116 
117   return true;
118 }
119 
120 // Make sure that a string literal and a `string_view` produce the same results (which isn't always the case, see
121 // below).
122 template <class T>
sv(T && str)123 constexpr std::string_view sv(T&& str) {
124   return std::string_view(str);
125 };
126 
127 template <class T, class Separator, class U, std::size_t M>
test_one(T && input,Separator && separator,std::array<U,M> expected)128 constexpr void test_one(T&& input, Separator&& separator, std::array<U, M> expected) {
129   assert(test_function_call(input, separator, expected));
130   assert(test_with_piping(input, separator, expected));
131 }
132 
test_string_literals()133 constexpr bool test_string_literals() {
134   // These tests show characteristic examples of how using string literals with `split_view` produces unexpected
135   // results due to the implicit terminating null that is treated as part of the range.
136 
137   using namespace std::string_view_literals;
138 
139   char short_sep = ' ';
140   auto long_sep  = "12"sv;
141 
142   // When splitting a string literal, only the last segment will be null-terminated (getting the terminating null from
143   // the original range).
144   {
145     std::array expected = {"abc"sv, std::string_view("def", sizeof("def"))};
146 
147     assert(test_function_call("abc def", short_sep, expected));
148     assert(test_with_piping("abc def", short_sep, expected));
149     assert(test_function_call("abc12def", long_sep, expected));
150     assert(test_with_piping("abc12def", long_sep, expected));
151   }
152 
153   // Empty string.
154   {
155     // Because an empty string literal contains an implicit terminating null, the output will contain one segment.
156     std::array expected = {std::string_view("", 1)};
157 
158     assert(test_function_call("", short_sep, expected));
159     assert(test_with_piping("", short_sep, expected));
160     assert(test_function_call("", long_sep, expected));
161     assert(test_with_piping("", long_sep, expected));
162   }
163 
164   // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
165   // are treated differently due to the presence of an implicit `\0` in the latter.
166   {
167     const char input[]          = "abc def";
168     std::array expected_unsplit = {std::string_view(input, sizeof(input))};
169     std::array expected_split   = {"abc"sv, std::string_view("def", sizeof("def"))};
170 
171     assert(test_function_call(input, " ", expected_unsplit));
172     assert(test_function_call("abc \0def", " ", expected_split));
173     // Note: string literals don't work with piping because arrays decay to pointers, and pointers don't model `range`.
174   }
175 
176   // Empty separator.
177   {
178     auto empty_sep      = ""sv;
179     std::array expected = {"a"sv, "b"sv, "c"sv, "\0"sv};
180 
181     assert(test_function_call("abc", empty_sep, expected));
182     assert(test_with_piping("abc", empty_sep, expected));
183   }
184 
185   return true;
186 }
187 
test_nontrivial_characters()188 bool test_nontrivial_characters() {
189   // Try a deliberately heavyweight "character" type to see if it triggers any corner cases.
190 
191   using Map = std::map<std::string, int>;
192   using Vec = std::vector<Map>;
193 
194   Map sep = {{"yyy", 999}};
195   Map m1  = {
196       {"a", 1},
197       {"bc", 2},
198   };
199   Map m2 = {
200       {"def", 3},
201   };
202   Map m3 = {
203       {"g", 4},
204       {"hijk", 5},
205   };
206 
207   Vec expected1 = {m1, m2};
208   Vec expected2 = {m3};
209 
210   std::ranges::split_view v(Vec{m1, m2, sep, m3}, sep);
211 
212   // Segment 1: {m1, m2}
213   auto outer = v.begin();
214   assert(outer != v.end());
215   auto inner = (*outer).begin();
216   assert(*inner++ == m1);
217   assert(*inner++ == m2);
218   assert(inner == (*outer).end());
219 
220   // Segment 2: {m3}
221   ++outer;
222   assert(outer != v.end());
223   inner = (*outer).begin();
224   assert(*inner++ == m3);
225   assert(inner == (*outer).end());
226 
227   ++outer;
228   assert(outer == v.end());
229 
230   return true;
231 }
232 
main_test()233 constexpr bool main_test() {
234   using namespace std::string_view_literals;
235 
236   char short_sep = ' ';
237   auto long_sep  = "12"sv;
238 
239   // One separator.
240   {
241     std::array expected = {"abc"sv, "def"sv};
242     test_one("abc def"sv, short_sep, expected);
243     test_one("abc12def"sv, long_sep, expected);
244   }
245 
246   // Several separators in a row.
247   {
248     std::array expected = {"abc"sv, ""sv, ""sv, ""sv, "def"sv};
249     test_one("abc    def"sv, short_sep, expected);
250     test_one("abc12121212def"sv, long_sep, expected);
251   }
252 
253   // Trailing separator.
254   {
255     std::array expected = {"abc"sv, "def"sv, ""sv};
256     test_one("abc def "sv, short_sep, expected);
257     test_one("abc12def12"sv, long_sep, expected);
258   }
259 
260   // Leading separator.
261   {
262     std::array expected = {""sv, "abc"sv, "def"sv};
263     test_one(" abc def"sv, short_sep, expected);
264     test_one("12abc12def"sv, long_sep, expected);
265   }
266 
267   // No separator.
268   {
269     std::array expected = {"abc"sv};
270     test_one("abc"sv, short_sep, expected);
271     test_one("abc"sv, long_sep, expected);
272   }
273 
274   // Input consisting of a single separator.
275   {
276     std::array expected = {""sv, ""sv};
277     test_one(" "sv, short_sep, expected);
278     test_one("12"sv, long_sep, expected);
279   }
280 
281   // Input consisting of only separators.
282   {
283     std::array expected = {""sv, ""sv, ""sv, ""sv};
284     test_one("   "sv, short_sep, expected);
285     test_one("121212"sv, long_sep, expected);
286   }
287 
288   // The separator and the string use the same character only.
289   {
290     auto overlapping_sep = "aaa"sv;
291     std::array expected  = {""sv, "aa"sv};
292     test_one("aaaaa"sv, overlapping_sep, expected);
293   }
294 
295   // Many redundant separators.
296   {
297     std::array expected = {""sv, ""sv, "abc"sv, ""sv, ""sv, "def"sv, ""sv, ""sv};
298     test_one("  abc   def  "sv, short_sep, expected);
299     test_one("1212abc121212def1212"sv, long_sep, expected);
300   }
301 
302   // Separators after every character.
303   {
304     std::array expected = {""sv, "a"sv, "b"sv, "c"sv, ""sv};
305     test_one(" a b c "sv, short_sep, expected);
306     test_one("12a12b12c12"sv, long_sep, expected);
307   }
308 
309   // Overlap between the separator and the string (see https://wg21.link/lwg3505).
310   {
311     auto overlapping_sep = "ab"sv;
312     std::array expected  = {"a"sv, "aa"sv, ""sv, "b"sv};
313     test_one("aabaaababb"sv, overlapping_sep, expected);
314   }
315 
316   // Empty input.
317   {
318     std::array<std::string_view, 0> expected = {};
319     test_one(""sv, short_sep, expected);
320     test_one(""sv, long_sep, expected);
321   }
322 
323   // Empty separator.
324   {
325     auto empty_sep      = ""sv;
326     std::array expected = {"a"sv, "b"sv, "c"sv};
327     test_one("abc"sv, empty_sep, expected);
328     test_one("abc"sv, empty_sep, expected);
329   }
330 
331   // Terminating null as a separator.
332   {
333     std::array expected = {"abc"sv, "def"sv};
334     test_one("abc\0def"sv, '\0', expected);
335     test_one("abc\0\0def"sv, "\0\0"sv, expected);
336   }
337 
338   // Different character types.
339   {
340     // `char`.
341     test_function_call("abc def", ' ', std::array{"abc"sv, "def"sv});
342 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
343     // `wchar_t`.
344     test_function_call(L"abc def", L' ', std::array{L"abc"sv, L"def"sv});
345 #endif
346     // `char8_t`.
347     test_function_call(u8"abc def", u8' ', std::array{u8"abc"sv, u8"def"sv});
348     // `char16_t`.
349     test_function_call(u"abc def", u' ', std::array{u"abc"sv, u"def"sv});
350     // `char32_t`.
351     test_function_call(U"abc def", U' ', std::array{U"abc"sv, U"def"sv});
352   }
353 
354   // Non-character input.
355   {
356     std::array expected = {std::array{1, 2, 3}, std::array{4, 5, 6}};
357     test_one(std::array{1, 2, 3, 0, 4, 5, 6}, 0, expected);
358     test_one(std::array{1, 2, 3, 0, 0, 0, 4, 5, 6}, std::array{0, 0, 0}, expected);
359   }
360 
361   return true;
362 }
363 
364 constexpr bool example_test() {
365   // example code in the spec
366   std::string str{"the quick brown fox"};
367   std::vector<std::string_view> result;
368   for (auto r : std::views::split(str, ' ')) {
369     result.emplace_back(r.begin(), r.end());
370   }
371   using namespace std::string_view_literals;
372   auto expected = {"the"sv, "quick"sv, "brown"sv, "fox"sv};
373   assert(std::ranges::equal(result, expected));
374 
375   return true;
376 }
377 
378 int main(int, char**) {
379   example_test();
380   static_assert(example_test());
381 
382   test_string_literals();
383   static_assert(test_string_literals());
384 
385   test_l_r_values();
386   static_assert(test_l_r_values());
387 
388   test_string_literal_separator();
389   static_assert(test_string_literal_separator());
390 
391   // Note: map is not `constexpr`, so this test is runtime-only.
392   test_nontrivial_characters();
393 
394   return 0;
395 }
396