xref: /netbsd-src/external/gpl3/gcc.old/dist/libstdc++-v3/include/pstl/algorithm_impl.h (revision 53d1339bf7f9c7367b35a9e1ebe693f9b047a47b)
1 // -*- C++ -*-
2 //===-- algorithm_impl.h --------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef __PSTL_algorithm_impl_H
11 #define __PSTL_algorithm_impl_H
12 
13 #include <iterator>
14 #include <type_traits>
15 #include <utility>
16 #include <functional>
17 #include <algorithm>
18 
19 #include "execution_impl.h"
20 #include "memory_impl.h"
21 #include "parallel_backend_utils.h"
22 #include "unseq_backend_simd.h"
23 
24 #if __PSTL_USE_PAR_POLICIES
25 #include "parallel_backend.h"
26 #include "parallel_impl.h"
27 #endif
28 
29 namespace __pstl
30 {
31 namespace __internal
32 {
33 
34 //------------------------------------------------------------------------
35 // any_of
36 //------------------------------------------------------------------------
37 
38 template <class _ForwardIterator, class _Pred>
39 bool
40 __brick_any_of(const _ForwardIterator __first, const _ForwardIterator __last, _Pred __pred,
41                /*__is_vector=*/std::false_type) noexcept
42 {
43     return std::any_of(__first, __last, __pred);
44 };
45 
46 template <class _ForwardIterator, class _Pred>
47 bool
48 __brick_any_of(const _ForwardIterator __first, const _ForwardIterator __last, _Pred __pred,
49                /*__is_vector=*/std::true_type) noexcept
50 {
51     return __unseq_backend::__simd_or(__first, __last - __first, __pred);
52 };
53 
54 template <class _ExecutionPolicy, class _ForwardIterator, class _Pred, class _IsVector>
55 bool
56 __pattern_any_of(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred,
57                  _IsVector __is_vector, /*parallel=*/std::false_type) noexcept
58 {
59     return __internal::__brick_any_of(__first, __last, __pred, __is_vector);
60 }
61 
62 #if __PSTL_USE_PAR_POLICIES
63 template <class _ExecutionPolicy, class _ForwardIterator, class _Pred, class _IsVector>
64 bool
65 __pattern_any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred,
66                  _IsVector __is_vector, /*parallel=*/std::true_type)
67 {
68     return __internal::__except_handler([&]() {
69        return __internal::__parallel_or(std::forward<_ExecutionPolicy>(__exec), __first, __last,
70                              [__pred, __is_vector](_ForwardIterator __i, _ForwardIterator __j) {
71                                  return __internal::__brick_any_of(__i, __j, __pred, __is_vector);
72                              });
73     });
74 }
75 #endif
76 
77 // [alg.foreach]
78 // for_each_n with no policy
79 
80 template <class _ForwardIterator, class _Size, class _Function>
81 _ForwardIterator
82 __for_each_n_it_serial(_ForwardIterator __first, _Size __n, _Function __f)
83 {
84     for (; __n > 0; ++__first, --__n)
85         __f(__first);
86     return __first;
87 }
88 
89 //------------------------------------------------------------------------
90 // walk1 (pseudo)
91 //
92 // walk1 evaluates f(x) for each dereferenced value x drawn from [first,last)
93 //------------------------------------------------------------------------
94 template <class _ForwardIterator, class _Function>
95 void
96 __brick_walk1(_ForwardIterator __first, _ForwardIterator __last, _Function __f, /*vector=*/std::false_type) noexcept
97 {
98     std::for_each(__first, __last, __f);
99 }
100 
101 template <class _RandomAccessIterator, class _Function>
102 void
103 __brick_walk1(_RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f,
104               /*vector=*/std::true_type) noexcept
105 {
106     __unseq_backend::__simd_walk_1(__first, __last - __first, __f);
107 }
108 
109 template <class _ExecutionPolicy, class _ForwardIterator, class _Function, class _IsVector>
110 void
111 __pattern_walk1(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f,
112                 _IsVector __is_vector,
113                 /*parallel=*/std::false_type) noexcept
114 {
115     __internal::__brick_walk1(__first, __last, __f, __is_vector);
116 }
117 
118 #if __PSTL_USE_PAR_POLICIES
119 template <class _ExecutionPolicy, class _ForwardIterator, class _Function, class _IsVector>
120 void
121 __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f,
122                 _IsVector __is_vector,
123                 /*parallel=*/std::true_type)
124 {
125     __internal::__except_handler([&]() {
126         __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last,
127                                       [__f, __is_vector](_ForwardIterator __i, _ForwardIterator __j) {
128                                           __internal::__brick_walk1(__i, __j, __f, __is_vector);
129                                       });
130     });
131 }
132 #endif
133 
134 template <class _ExecutionPolicy, class _ForwardIterator, class _Brick>
135 void
136 __pattern_walk_brick(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Brick __brick,
137                      /*parallel=*/std::false_type) noexcept
138 {
139     __brick(__first, __last);
140 }
141 
142 #if __PSTL_USE_PAR_POLICIES
143 template <class _ExecutionPolicy, class _ForwardIterator, class _Brick>
144 void
145 __pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Brick __brick,
146                      /*parallel=*/std::true_type)
147 {
148     __internal::__except_handler([&]() {
149         __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last,
150                                       [__brick](_ForwardIterator __i, _ForwardIterator __j) { __brick(__i, __j); });
151     });
152 }
153 #endif
154 
155 //------------------------------------------------------------------------
156 // walk1_n
157 //------------------------------------------------------------------------
158 template <class _ForwardIterator, class _Size, class _Function>
159 _ForwardIterator
160 __brick_walk1_n(_ForwardIterator __first, _Size __n, _Function __f, /*_IsVectorTag=*/std::false_type)
161 {
162     return __internal::__for_each_n_it_serial(__first, __n,
163                                   [&__f](_ForwardIterator __it) { __f(*__it); }); // calling serial version
164 }
165 
166 template <class _RandomAccessIterator, class _DifferenceType, class _Function>
167 _RandomAccessIterator
168 __brick_walk1_n(_RandomAccessIterator __first, _DifferenceType __n, _Function __f,
169                 /*vectorTag=*/std::true_type) noexcept
170 {
171     return __unseq_backend::__simd_walk_1(__first, __n, __f);
172 }
173 
174 template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Function, class _IsVector>
175 _ForwardIterator
176 __pattern_walk1_n(_ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f, _IsVector __is_vector,
177                   /*is_parallel=*/std::false_type) noexcept
178 {
179     return __internal::__brick_walk1_n(__first, __n, __f, __is_vector);
180 }
181 
182 #if __PSTL_USE_PAR_POLICIES
183 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Function, class _IsVector>
184 _RandomAccessIterator
185 __pattern_walk1_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Function __f,
186                   _IsVector __is_vector,
187                   /*is_parallel=*/std::true_type)
188 {
189     __internal::__pattern_walk1(std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f, __is_vector, std::true_type());
190     return __first + __n;
191 }
192 #endif
193 
194 template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Brick>
195 _ForwardIterator
196 __pattern_walk_brick_n(_ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Brick __brick,
197                        /*is_parallel=*/std::false_type) noexcept
198 {
199     return __brick(__first, __n);
200 }
201 
202 #if __PSTL_USE_PAR_POLICIES
203 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Brick>
204 _RandomAccessIterator
205 __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Brick __brick,
206                        /*is_parallel=*/std::true_type)
207 {
208     return __internal::__except_handler([&]() {
209         __par_backend::__parallel_for(
210             std::forward<_ExecutionPolicy>(__exec), __first, __first + __n,
211             [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j - __i); });
212         return __first + __n;
213     });
214 }
215 #endif
216 
217 //------------------------------------------------------------------------
218 // walk2 (pseudo)
219 //
220 // walk2 evaluates f(x,y) for deferenced values (x,y) drawn from [first1,last1) and [first2,...)
221 //------------------------------------------------------------------------
222 template <class _ForwardIterator1, class _ForwardIterator2, class _Function>
223 _ForwardIterator2
224 __brick_walk2(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f,
225               /*vector=*/std::false_type) noexcept
226 {
227     for (; __first1 != __last1; ++__first1, ++__first2)
228         __f(*__first1, *__first2);
229     return __first2;
230 }
231 
232 template <class _ForwardIterator1, class _ForwardIterator2, class _Function>
233 _ForwardIterator2
234 __brick_walk2(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f,
235               /*vector=*/std::true_type) noexcept
236 {
237     return __unseq_backend::__simd_walk_2(__first1, __last1 - __first1, __first2, __f);
238 }
239 
240 template <class _ForwardIterator1, class _Size, class _ForwardIterator2, class _Function>
241 _ForwardIterator2
242 __brick_walk2_n(_ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f,
243                 /*vector=*/std::false_type) noexcept
244 {
245     for (; __n > 0; --__n, ++__first1, ++__first2)
246         __f(*__first1, *__first2);
247     return __first2;
248 }
249 
250 template <class _ForwardIterator1, class _Size, class _ForwardIterator2, class _Function>
251 _ForwardIterator2
252 __brick_walk2_n(_ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f,
253                 /*vector=*/std::true_type) noexcept
254 {
255     return __unseq_backend::__simd_walk_2(__first1, __n, __first2, __f);
256 }
257 
258 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Function, class _IsVector>
259 _ForwardIterator2
260 __pattern_walk2(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
261                 _Function __f, _IsVector __is_vector, /*parallel=*/std::false_type) noexcept
262 {
263     return __internal::__brick_walk2(__first1, __last1, __first2, __f, __is_vector);
264 }
265 
266 #if __PSTL_USE_PAR_POLICIES
267 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Function, class _IsVector>
268 _ForwardIterator2
269 __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
270                 _ForwardIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/std::true_type)
271 {
272     return __internal::__except_handler([&]() {
273         __par_backend::__parallel_for(
274             std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
275             [__f, __first1, __first2, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) {
276                 __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, __is_vector);
277             });
278         return __first2 + (__last1 - __first1);
279     });
280 }
281 #endif
282 
283 template <class _ExecutionPolicy, class _ForwardIterator1, class _Size, class _ForwardIterator2, class _Function,
284           class _IsVector>
285 _ForwardIterator2
286 __pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f,
287                   _IsVector is_vector, /*parallel=*/std::false_type) noexcept
288 {
289     return __internal::__brick_walk2_n(__first1, __n, __first2, __f, is_vector);
290 }
291 
292 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _Size, class _RandomAccessIterator2,
293           class _Function, class _IsVector>
294 _RandomAccessIterator2
295 __pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2,
296                   _Function __f, _IsVector __is_vector, /*parallel=*/std::true_type)
297 {
298     return __internal::__pattern_walk2(std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f,
299                                        __is_vector, std::true_type());
300 }
301 
302 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Brick>
303 _ForwardIterator2
304 __pattern_walk2_brick(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
305                       _ForwardIterator2 __first2, _Brick __brick, /*parallel=*/std::false_type) noexcept
306 {
307     return __brick(__first1, __last1, __first2);
308 }
309 
310 #if __PSTL_USE_PAR_POLICIES
311 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _Brick>
312 _RandomAccessIterator2
313 __pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
314                       _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/std::true_type)
315 {
316     return __internal::__except_handler([&]() {
317         __par_backend::__parallel_for(
318             std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
319             [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) {
320                 __brick(__i, __j, __first2 + (__i - __first1));
321             });
322         return __first2 + (__last1 - __first1);
323     });
324 }
325 #endif
326 
327 #if __PSTL_USE_PAR_POLICIES
328 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _Size, class _RandomAccessIterator2, class _Brick>
329 _RandomAccessIterator2
330 __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n,
331                         _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/std::true_type)
332 {
333     return __internal::__except_handler([&]() {
334         __par_backend::__parallel_for(
335             std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n,
336             [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) {
337                 __brick(__i, __j - __i, __first2 + (__i - __first1));
338             });
339         return __first2 + __n;
340     });
341 }
342 #endif
343 
344 template <class _ExecutionPolicy, class _ForwardIterator1, class _Size, class _ForwardIterator2, class _Brick>
345 _ForwardIterator2
346 __pattern_walk2_brick_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2,
347                         _Brick __brick, /*parallel=*/std::false_type) noexcept
348 {
349     return __brick(__first1, __n, __first2);
350 }
351 
352 //------------------------------------------------------------------------
353 // walk3 (pseudo)
354 //
355 // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...)
356 //------------------------------------------------------------------------
357 template <class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator3, class _Function>
358 _ForwardIterator3
359 __brick_walk3(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
360               _ForwardIterator3 __first3, _Function __f, /*vector=*/std::false_type) noexcept
361 {
362     for (; __first1 != __last1; ++__first1, ++__first2, ++__first3)
363         __f(*__first1, *__first2, *__first3);
364     return __first3;
365 }
366 
367 template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3, class _Function>
368 _RandomAccessIterator3
369 __brick_walk3(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
370               _RandomAccessIterator3 __first3, _Function __f, /*vector=*/std::true_type) noexcept
371 {
372     return __unseq_backend::__simd_walk_3(__first1, __last1 - __first1, __first2, __first3, __f);
373 }
374 
375 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator3,
376           class _Function, class _IsVector>
377 _ForwardIterator3
378 __pattern_walk3(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
379                 _ForwardIterator3 __first3, _Function __f, _IsVector __is_vector, /*parallel=*/std::false_type) noexcept
380 {
381     return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, __is_vector);
382 }
383 
384 #if __PSTL_USE_PAR_POLICIES
385 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
386           class _RandomAccessIterator3, class _Function, class _IsVector>
387 _RandomAccessIterator3
388 __pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
389                 _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector,
390                 /*parallel=*/std::true_type)
391 {
392     return __internal::__except_handler([&]() {
393         __par_backend::__parallel_for(
394             std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
395             [__f, __first1, __first2, __first3, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) {
396                 __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, __is_vector);
397             });
398         return __first3 + (__last1 - __first1);
399     });
400 }
401 #endif
402 
403 //------------------------------------------------------------------------
404 // equal
405 //------------------------------------------------------------------------
406 
407 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
408 bool
409 __brick_equal(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
410               _ForwardIterator2 __last2, _BinaryPredicate __p, /* IsVector = */ std::false_type) noexcept
411 {
412     return std::equal(__first1, __last1, __first2, __last2, __p);
413 }
414 
415 template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
416 bool
417 __brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
418               _RandomAccessIterator2 __last2, _BinaryPredicate __p, /* is_vector = */ std::true_type) noexcept
419 {
420     if (__last1 - __first1 != __last2 - __first2)
421         return false;
422 
423     return __unseq_backend::__simd_first(__first1, __last1 - __first1, __first2,
424                                          __internal::__not_pred<_BinaryPredicate>(__p))
425                .first == __last1;
426 }
427 
428 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate,
429           class _IsVector>
430 bool
431 __pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
432                 _ForwardIterator2 __last2, _BinaryPredicate __p, _IsVector __is_vector, /* is_parallel = */
433                 std::false_type) noexcept
434 {
435     return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, __is_vector);
436 }
437 
438 #if _PSTL_USE_PAR_POLICIES
439 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate,
440           class _IsVector>
441 bool
442 __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
443                 _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _BinaryPredicate __p,
444                 _IsVector __is_vector, /*is_parallel=*/std::true_type)
445 {
446     if (__last1 - __first1 != __last2 - __first2)
447         return false;
448 
449     return __internal::__except_handler([&]() {
450         return !__internal::__parallel_or(
451             std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
452             [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) {
453                 return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1),
454                                                   __p, __is_vector);
455             });
456     });
457 }
458 #endif
459 
460 //------------------------------------------------------------------------
461 // equal version for sequences with equal length
462 //------------------------------------------------------------------------
463 
464 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
465 bool
466 __brick_equal(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __p,
467               /* IsVector = */ std::false_type) noexcept
468 {
469     return std::equal(__first1, __last1, __first2, __p);
470 }
471 
472 template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
473 bool
474 __brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
475               _BinaryPredicate __p, /* is_vector = */ std::true_type) noexcept
476 {
477     return __unseq_backend::__simd_first(__first1, __last1 - __first1, __first2, __not_pred<_BinaryPredicate>(__p))
478                .first == __last1;
479 }
480 
481 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate,
482           class _IsVector>
483 bool
484 __pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
485                 _BinaryPredicate __p, _IsVector __is_vector, /* is_parallel = */ std::false_type) noexcept
486 {
487     return __internal::__brick_equal(__first1, __last1, __first2, __p, __is_vector);
488 }
489 
490 #if __PSTL_USE_PAR_POLICIES
491 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate,
492           class _IsVector>
493 bool
494 __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
495                 _RandomAccessIterator2 __first2, _BinaryPredicate __p, _IsVector __is_vector,
496                 /*is_parallel=*/std::true_type)
497 {
498     return __internal::__except_handler([&]() {
499         return !__internal::__parallel_or(
500             std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
501             [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) {
502                 return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, __is_vector);
503             });
504     });
505 }
506 #endif
507 
508 //------------------------------------------------------------------------
509 // find_if
510 //------------------------------------------------------------------------
511 template <class _ForwardIterator, class _Predicate>
512 _ForwardIterator
513 __brick_find_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
514                 /*is_vector=*/std::false_type) noexcept
515 {
516     return std::find_if(__first, __last, __pred);
517 }
518 
519 template <class _RandomAccessIterator, class _Predicate>
520 _RandomAccessIterator
521 __brick_find_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _Predicate __pred,
522                 /*is_vector=*/std::true_type) noexcept
523 {
524     typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType;
525     return __unseq_backend::__simd_first(
526         __first, _SizeType(0), __last - __first,
527         [&__pred](_RandomAccessIterator __it, _SizeType __i) { return __pred(__it[__i]); });
528 }
529 
530 template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate, class _IsVector>
531 _ForwardIterator
532 __pattern_find_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
533                   _IsVector __is_vector,
534                   /*is_parallel=*/std::false_type) noexcept
535 {
536     return __internal::__brick_find_if(__first, __last, __pred, __is_vector);
537 }
538 
539 #if __PSTL_USE_PAR_POLICIES
540 template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate, class _IsVector>
541 _ForwardIterator
542 __pattern_find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
543                   _IsVector __is_vector,
544                   /*is_parallel=*/std::true_type)
545 {
546     return __internal::__except_handler([&]() {
547 			return __internal::__parallel_find(std::forward<_ExecutionPolicy>(__exec), __first, __last,
548                                [__pred, __is_vector](_ForwardIterator __i, _ForwardIterator __j) {
549                                    return __internal::__brick_find_if(__i, __j, __pred, __is_vector);
550                                },
551                                std::less<typename std::iterator_traits<_ForwardIterator>::difference_type>(),
552                                /*is_first=*/true);
553     });
554 }
555 #endif
556 
557 //------------------------------------------------------------------------
558 // find_end
559 //------------------------------------------------------------------------
560 
561 // find the first occurrence of the subsequence [s_first, s_last)
562 //   or the  last occurrence of the subsequence in the range [first, last)
563 // b_first determines what occurrence we want to find (first or last)
564 template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate, class _IsVector>
565 _RandomAccessIterator1
566 __find_subrange(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator1 __global_last,
567                 _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred,
568                 bool __b_first, _IsVector __is_vector) noexcept
569 {
570     typedef typename std::iterator_traits<_RandomAccessIterator2>::value_type _ValueType;
571     auto __n2 = __s_last - __s_first;
572     if (__n2 < 1)
573     {
574         return __b_first ? __first : __last;
575     }
576 
577     auto __n1 = __global_last - __first;
578     if (__n1 < __n2)
579     {
580         return __last;
581     }
582 
583     auto __cur = __last;
584     while (__first != __last && (__global_last - __first >= __n2))
585     {
586         // find position of *s_first in [first, last) (it can be start of subsequence)
587         __first = __internal::__brick_find_if(__first, __last,
588                                   __equal_value_by_pred<_ValueType, _BinaryPredicate>(*__s_first, __pred), __is_vector);
589 
590         // if position that was found previously is the start of subsequence
591         // then we can exit the loop (b_first == true) or keep the position
592         // (b_first == false)
593         if (__first != __last && (__global_last - __first >= __n2) &&
594             __internal::__brick_equal(__s_first + 1, __s_last, __first + 1, __pred, __is_vector))
595         {
596             if (__b_first)
597             {
598                 return __first;
599             }
600             else
601             {
602                 __cur = __first;
603             }
604         }
605         else if (__first == __last)
606         {
607             break;
608         }
609         else
610         {
611         }
612 
613         // in case of b_first == false we try to find new start position
614         // for the next subsequence
615         ++__first;
616     }
617     return __cur;
618 }
619 
620 template <class _RandomAccessIterator, class _Size, class _Tp, class _BinaryPredicate, class _IsVector>
621 _RandomAccessIterator
622 __find_subrange(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __global_last,
623                 _Size __count, const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector) noexcept
624 {
625     if (__global_last - __first < __count || __count < 1)
626     {
627         return __last; // According to the standard last shall be returned when count < 1
628     }
629 
630     auto __n = __global_last - __first;
631     auto __unary_pred = __equal_value_by_pred<_Tp, _BinaryPredicate>(__value, __pred);
632     while (__first != __last && (__global_last - __first >= __count))
633     {
634         __first = __internal::__brick_find_if(__first, __last, __unary_pred, __is_vector);
635 
636         // check that all of elements in [first+1, first+count) equal to value
637         if (__first != __last && (__global_last - __first >= __count) &&
638             !__internal::__brick_any_of(__first + 1, __first + __count, __not_pred<decltype(__unary_pred)>(__unary_pred),
639                             __is_vector))
640         {
641             return __first;
642         }
643         else if (__first == __last)
644         {
645             break;
646         }
647         else
648         {
649             ++__first;
650         }
651     }
652     return __last;
653 }
654 
655 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
656 _ForwardIterator1
657 __brick_find_end(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
658                  _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::false_type) noexcept
659 {
660     return std::find_end(__first, __last, __s_first, __s_last, __pred);
661 }
662 
663 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
664 _ForwardIterator1
665 __brick_find_end(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
666                  _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::true_type) noexcept
667 {
668     return __find_subrange(__first, __last, __last, __s_first, __s_last, __pred, false, std::true_type());
669 }
670 
671 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate,
672           class _IsVector>
673 _ForwardIterator1
674 __pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
675                    _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector,
676                    /*is_parallel=*/std::false_type) noexcept
677 {
678     return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, __is_vector);
679 }
680 
681 #if __PSTL_USE_PAR_POLICIES
682 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate,
683           class _IsVector>
684 _ForwardIterator1
685 __pattern_find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
686                    _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred,
687                    _IsVector __is_vector, /*is_parallel=*/std::true_type) noexcept
688 {
689     if (__last - __first == __s_last - __s_first)
690     {
691         const bool __res = __internal::__pattern_equal(std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred,
692                                            __is_vector, std::true_type());
693         return __res ? __first : __last;
694     }
695     else
696     {
697         return __internal::__except_handler([&]() {
698            return __internal::__parallel_find(
699                 std::forward<_ExecutionPolicy>(__exec), __first, __last,
700                 [__last, __s_first, __s_last, __pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) {
701                     return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, __is_vector);
702                 },
703                 std::greater<typename std::iterator_traits<_ForwardIterator1>::difference_type>(), /*is_first=*/false);
704         });
705     }
706 }
707 #endif
708 
709 //------------------------------------------------------------------------
710 // find_first_of
711 //------------------------------------------------------------------------
712 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
713 _ForwardIterator1
714 __brick_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
715                       _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::false_type) noexcept
716 {
717     return std::find_first_of(__first, __last, __s_first, __s_last, __pred);
718 }
719 
720 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
721 _ForwardIterator1
722 __brick_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
723                       _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::true_type) noexcept
724 {
725     return __unseq_backend::__simd_find_first_of(__first, __last, __s_first, __s_last, __pred);
726 }
727 
728 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate,
729           class _IsVector>
730 _ForwardIterator1
731 __pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last,
732                         _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred,
733                         _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
734 {
735     return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred, __is_vector);
736 }
737 
738 #if __PSTL_USE_PAR_POLICIES
739 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate,
740           class _IsVector>
741 _ForwardIterator1
742 __pattern_find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
743                         _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred,
744                         _IsVector __is_vector, /*is_parallel=*/std::true_type) noexcept
745 {
746     return __internal::__except_handler([&]() {
747          return __internal::__parallel_find(
748             std::forward<_ExecutionPolicy>(__exec), __first, __last,
749             [__s_first, __s_last, __pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) {
750                 return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, __is_vector);
751             },
752             std::less<typename std::iterator_traits<_ForwardIterator1>::difference_type>(), /*is_first=*/true);
753     });
754 }
755 #endif
756 
757 //------------------------------------------------------------------------
758 // search
759 //------------------------------------------------------------------------
760 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
761 _ForwardIterator1
762 __brick_search(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
763                _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*vector=*/std::false_type) noexcept
764 {
765     return std::search(__first, __last, __s_first, __s_last, __pred);
766 }
767 
768 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
769 _ForwardIterator1
770 __brick_search(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
771                _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept
772 {
773     return __internal::__find_subrange(__first, __last, __last, __s_first, __s_last, __pred, true, std::true_type());
774 }
775 
776 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate,
777           class _IsVector>
778 _ForwardIterator1
779 __pattern_search(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
780                  _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector,
781                  /*is_parallel=*/std::false_type) noexcept
782 {
783     return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, __is_vector);
784 }
785 
786 #if __PSTL_USE_PAR_POLICIES
787 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate,
788           class _IsVector>
789 _ForwardIterator1
790 __pattern_search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
791                  _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred,
792                  _IsVector __is_vector,
793                  /*is_parallel=*/std::true_type) noexcept
794 {
795     if (__last - __first == __s_last - __s_first)
796     {
797         const bool __res = __internal::__pattern_equal(std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred,
798                                            __is_vector, std::true_type());
799         return __res ? __first : __last;
800     }
801     else
802     {
803         return __internal::__except_handler([&]() {
804             return __internal::__parallel_find(
805                 std::forward<_ExecutionPolicy>(__exec), __first, __last,
806                 [__last, __s_first, __s_last, __pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) {
807                     return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, __is_vector);
808                 },
809                 std::less<typename std::iterator_traits<_ForwardIterator1>::difference_type>(), /*is_first=*/true);
810         });
811     }
812 }
813 #endif
814 
815 //------------------------------------------------------------------------
816 // search_n
817 //------------------------------------------------------------------------
818 template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
819 _ForwardIterator
820 __brick_search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value,
821                  _BinaryPredicate __pred, /*vector=*/std::false_type) noexcept
822 {
823     return std::search_n(__first, __last, __count, __value, __pred);
824 }
825 
826 template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
827 _ForwardIterator
828 __brick_search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value,
829                  _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept
830 {
831     return __internal::__find_subrange(__first, __last, __last, __count, __value, __pred, std::true_type());
832 }
833 
834 template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate,
835           class _IsVector>
836 _ForwardIterator
837 __pattern_search_n(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count,
838                    const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector,
839                    /*is_parallel=*/std::false_type) noexcept
840 {
841     return __internal::__brick_search_n(__first, __last, __count, __value, __pred, __is_vector);
842 }
843 
844 #if __PSTL_USE_PAR_POLICIES
845 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Tp, class _BinaryPredicate,
846           class _IsVector>
847 _RandomAccessIterator
848 __pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
849                    _Size __count, const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector,
850                    /*is_parallel=*/std::true_type) noexcept
851 {
852     if (__last - __first == __count)
853     {
854         const bool __result =
855             !__internal::__pattern_any_of(std::forward<_ExecutionPolicy>(__exec), __first, __last,
856                               [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }, __is_vector,
857                               /*is_parallel*/ std::true_type());
858         return __result ? __first : __last;
859     }
860     else
861     {
862         return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred, __is_vector]() {
863             return __internal::__parallel_find(
864                 std::forward<_ExecutionPolicy>(__exec), __first, __last,
865                 [__last, __count, &__value, __pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) {
866                     return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, __is_vector);
867                 },
868                 std::less<typename std::iterator_traits<_RandomAccessIterator>::difference_type>(), /*is_first=*/true);
869         });
870     }
871 }
872 #endif
873 
874 //------------------------------------------------------------------------
875 // copy_n
876 //------------------------------------------------------------------------
877 
878 template <class _ForwardIterator, class _Size, class _OutputIterator>
879 _OutputIterator
880 __brick_copy_n(_ForwardIterator __first, _Size __n, _OutputIterator __result, /*vector=*/std::false_type) noexcept
881 {
882     return std::copy_n(__first, __n, __result);
883 }
884 
885 template <class _ForwardIterator, class _Size, class _OutputIterator>
886 _OutputIterator
887 __brick_copy_n(_ForwardIterator __first, _Size __n, _OutputIterator __result, /*vector=*/std::true_type) noexcept
888 {
889     return __unseq_backend::__simd_assign(
890         __first, __n, __result, [](_ForwardIterator __first, _OutputIterator __result) { *__result = *__first; });
891 }
892 
893 //------------------------------------------------------------------------
894 // copy
895 //------------------------------------------------------------------------
896 template <class _ForwardIterator, class _OutputIterator>
897 _OutputIterator
898 __brick_copy(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
899              /*vector=*/std::false_type) noexcept
900 {
901     return std::copy(__first, __last, __result);
902 }
903 
904 template <class _RandomAccessIterator, class _OutputIterator>
905 _OutputIterator
906 __brick_copy(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
907              /*vector=*/std::true_type) noexcept
908 {
909     return __unseq_backend::__simd_assign(
910         __first, __last - __first, __result,
911         [](_RandomAccessIterator __first, _OutputIterator __result) { *__result = *__first; });
912 }
913 
914 //------------------------------------------------------------------------
915 // move
916 //------------------------------------------------------------------------
917 template <class _ForwardIterator, class _OutputIterator>
918 _OutputIterator
919 __brick_move(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
920              /*vector=*/std::false_type) noexcept
921 {
922     return std::move(__first, __last, __result);
923 }
924 
925 template <class _RandomAccessIterator, class _OutputIterator>
926 _OutputIterator
927 __brick_move(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
928              /*vector=*/std::true_type) noexcept
929 {
930     return __unseq_backend::__simd_assign(
931         __first, __last - __first, __result,
932         [](_RandomAccessIterator __first, _OutputIterator __result) { *__result = std::move(*__first); });
933 }
934 
935 //------------------------------------------------------------------------
936 // swap_ranges
937 //------------------------------------------------------------------------
938 template <class _ForwardIterator, class _OutputIterator>
939 _OutputIterator
940 __brick_swap_ranges(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
941                     /*vector=*/std::false_type) noexcept
942 {
943     return std::swap_ranges(__first, __last, __result);
944 }
945 
946 template <class _ForwardIterator, class _OutputIterator>
947 _OutputIterator
948 __brick_swap_ranges(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
949                     /*vector=*/std::true_type) noexcept
950 {
951     using std::iter_swap;
952     return __unseq_backend::__simd_assign(__first, __last - __first, __result,
953                                           iter_swap<_ForwardIterator, _OutputIterator>);
954 }
955 
956 //------------------------------------------------------------------------
957 // copy_if
958 //------------------------------------------------------------------------
959 template <class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
960 _OutputIterator
961 __brick_copy_if(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, _UnaryPredicate __pred,
962                 /*vector=*/std::false_type) noexcept
963 {
964     return std::copy_if(__first, __last, __result, __pred);
965 }
966 
967 template <class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
968 _OutputIterator
969 __brick_copy_if(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, _UnaryPredicate __pred,
970                 /*vector=*/std::true_type) noexcept
971 {
972 #if (__PSTL_MONOTONIC_PRESENT)
973     return __unseq_backend::__simd_copy_if(__first, __last - __first, __result, __pred);
974 #else
975     return std::copy_if(__first, __last, __result, __pred);
976 #endif
977 }
978 
979 // TODO: Try to use transform_reduce for combining __brick_copy_if_phase1 on IsVector.
980 template <class _DifferenceType, class _ForwardIterator, class _UnaryPredicate>
981 std::pair<_DifferenceType, _DifferenceType>
982 __brick_calc_mask_1(_ForwardIterator __first, _ForwardIterator __last, bool* __restrict __mask, _UnaryPredicate __pred,
983                     /*vector=*/std::false_type) noexcept
984 {
985     auto __count_true = _DifferenceType(0);
986     auto __size = __last - __first;
987 
988     static_assert(__is_random_access_iterator<_ForwardIterator>::value,
989                   "Pattern-brick error. Should be a random access iterator.");
990 
991     for (; __first != __last; ++__first, ++__mask)
992     {
993         *__mask = __pred(*__first);
994         if (*__mask)
995         {
996             ++__count_true;
997         }
998     }
999     return std::make_pair(__count_true, __size - __count_true);
1000 }
1001 
1002 template <class _DifferenceType, class _RandomAccessIterator, class _UnaryPredicate>
1003 std::pair<_DifferenceType, _DifferenceType>
1004 __brick_calc_mask_1(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __mask, _UnaryPredicate __pred,
1005                     /*vector=*/std::true_type) noexcept
1006 {
1007     auto __result = __unseq_backend::__simd_calc_mask_1(__first, __last - __first, __mask, __pred);
1008     return std::make_pair(__result, (__last - __first) - __result);
1009 }
1010 
1011 template <class _ForwardIterator, class _OutputIterator, class _Assigner>
1012 void
1013 __brick_copy_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, bool* __mask,
1014                      _Assigner __assigner, /*vector=*/std::false_type) noexcept
1015 {
1016     for (; __first != __last; ++__first, ++__mask)
1017     {
1018         if (*__mask)
1019         {
1020             __assigner(__first, __result);
1021             ++__result;
1022         }
1023     }
1024 }
1025 
1026 template <class _ForwardIterator, class _OutputIterator, class _Assigner>
1027 void
1028 __brick_copy_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
1029                      bool* __restrict __mask, _Assigner __assigner, /*vector=*/std::true_type) noexcept
1030 {
1031 #if (__PSTL_MONOTONIC_PRESENT)
1032     __unseq_backend::__simd_copy_by_mask(__first, __last - __first, __result, __mask, __assigner);
1033 #else
1034     __internal::__brick_copy_by_mask(__first, __last, __result, __mask, __assigner, std::false_type());
1035 #endif
1036 }
1037 
1038 template <class _ForwardIterator, class _OutputIterator1, class _OutputIterator2>
1039 void
1040 __brick_partition_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator1 __out_true,
1041                           _OutputIterator2 __out_false, bool* __mask, /*vector=*/std::false_type) noexcept
1042 {
1043     for (; __first != __last; ++__first, ++__mask)
1044     {
1045         if (*__mask)
1046         {
1047             *__out_true = *__first;
1048             ++__out_true;
1049         }
1050         else
1051         {
1052             *__out_false = *__first;
1053             ++__out_false;
1054         }
1055     }
1056 }
1057 
1058 template <class _RandomAccessIterator, class _OutputIterator1, class _OutputIterator2>
1059 void
1060 __brick_partition_by_mask(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator1 __out_true,
1061                           _OutputIterator2 __out_false, bool* __mask, /*vector=*/std::true_type) noexcept
1062 {
1063 #if (__PSTL_MONOTONIC_PRESENT)
1064     __unseq_backend::__simd_partition_by_mask(__first, __last - __first, __out_true, __out_false, __mask);
1065 #else
1066     __internal::__brick_partition_by_mask(__first, __last, __out_true, __out_false, __mask, std::false_type());
1067 #endif
1068 }
1069 
1070 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryPredicate, class _IsVector>
1071 _OutputIterator
1072 __pattern_copy_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
1073                   _UnaryPredicate __pred, _IsVector __is_vector, /*parallel=*/std::false_type) noexcept
1074 {
1075     return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector);
1076 }
1077 
1078 #if __PSTL_USE_PAR_POLICIES
1079 template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryPredicate,
1080           class _IsVector>
1081 _OutputIterator
1082 __pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
1083                   _OutputIterator __result, _UnaryPredicate __pred, _IsVector __is_vector, /*parallel=*/std::true_type)
1084 {
1085     typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
1086     const _DifferenceType __n = __last - __first;
1087     if (_DifferenceType(1) < __n)
1088     {
1089         __par_backend::__buffer<bool> __mask_buf(__n);
1090         return __internal::__except_handler([&__exec, __n, __first, __result, __is_vector, __pred, &__mask_buf]() {
1091             bool* __mask = __mask_buf.get();
1092             _DifferenceType __m{};
1093             __par_backend::__parallel_strict_scan(
1094                 std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
1095                 [=](_DifferenceType __i, _DifferenceType __len) { // Reduce
1096                     return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), __mask + __i,
1097                                                                 __pred, __is_vector)
1098                         .first;
1099                 },
1100                 std::plus<_DifferenceType>(),                                                // Combine
1101                 [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
1102                     __internal::__brick_copy_by_mask(__first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
1103                                          [](_RandomAccessIterator __x, _OutputIterator __z) { *__z = *__x; },
1104                                          __is_vector);
1105                 },
1106                 [&__m](_DifferenceType __total) { __m = __total; });
1107             return __result + __m;
1108         });
1109     }
1110     // trivial sequence - use serial algorithm
1111     return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector);
1112 }
1113 #endif
1114 
1115 //------------------------------------------------------------------------
1116 // count
1117 //------------------------------------------------------------------------
1118 template <class _ForwardIterator, class _Predicate>
1119 typename std::iterator_traits<_ForwardIterator>::difference_type
1120 __brick_count(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
1121               /* is_vector = */ std::true_type) noexcept
1122 {
1123     return __unseq_backend::__simd_count(__first, __last - __first, __pred);
1124 }
1125 
1126 template <class _ForwardIterator, class _Predicate>
1127 typename std::iterator_traits<_ForwardIterator>::difference_type
1128 __brick_count(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
1129               /* is_vector = */ std::false_type) noexcept
1130 {
1131     return std::count_if(__first, __last, __pred);
1132 }
1133 
1134 template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate, class _IsVector>
1135 typename std::iterator_traits<_ForwardIterator>::difference_type
1136 __pattern_count(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
1137                 /* is_parallel */ std::false_type, _IsVector __is_vector) noexcept
1138 {
1139     return __internal::__brick_count(__first, __last, __pred, __is_vector);
1140 }
1141 
1142 #if __PSTL_USE_PAR_POLICIES
1143 template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate, class _IsVector>
1144 typename std::iterator_traits<_ForwardIterator>::difference_type
1145 __pattern_count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
1146                 /* is_parallel */ std::true_type, _IsVector __is_vector)
1147 {
1148     typedef typename std::iterator_traits<_ForwardIterator>::difference_type _SizeType;
1149     return __internal::__except_handler([&]() {
1150         return __par_backend::__parallel_reduce(
1151             std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0),
1152             [__pred, __is_vector](_ForwardIterator __begin, _ForwardIterator __end, _SizeType __value) -> _SizeType {
1153                 return __value + __internal::__brick_count(__begin, __end, __pred, __is_vector);
1154             },
1155             std::plus<_SizeType>());
1156     });
1157 }
1158 #endif
1159 
1160 //------------------------------------------------------------------------
1161 // unique
1162 //------------------------------------------------------------------------
1163 
1164 template <class _ForwardIterator, class _BinaryPredicate>
1165 _ForwardIterator
1166 __brick_unique(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred,
1167                /*is_vector=*/std::false_type) noexcept
1168 {
1169     return std::unique(__first, __last, __pred);
1170 }
1171 
1172 template <class _ForwardIterator, class _BinaryPredicate>
1173 _ForwardIterator
1174 __brick_unique(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred,
1175                /*is_vector=*/std::true_type) noexcept
1176 {
1177     __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
1178     return std::unique(__first, __last, __pred);
1179 }
1180 
1181 template <class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate, class _IsVector>
1182 _ForwardIterator
1183 __pattern_unique(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred,
1184                  _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
1185 {
1186     return __internal::__brick_unique(__first, __last, __pred, __is_vector);
1187 }
1188 
1189 #if __PSTL_USE_PAR_POLICIES
1190 // That function is shared between two algorithms - remove_if (__pattern_remove_if) and unique (pattern unique). But a mask calculation is different.
1191 // So, a caller passes _CalcMask brick into remove_elements.
1192 template <class _ExecutionPolicy, class _ForwardIterator, class _CalcMask, class _IsVector>
1193 _ForwardIterator
1194 __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _CalcMask __calc_mask,
1195                 _IsVector __is_vector)
1196 {
1197     typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType;
1198     typedef typename std::iterator_traits<_ForwardIterator>::value_type _Tp;
1199     _DifferenceType __n = __last - __first;
1200     __par_backend::__buffer<bool> __mask_buf(__n);
1201     // 1. find a first iterator that should be removed
1202     return __internal::__except_handler([&]() {
1203         bool* __mask = __mask_buf.get();
1204         _DifferenceType __min = __par_backend::__parallel_reduce(
1205             std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n,
1206             [__first, __mask, &__calc_mask, __is_vector](_DifferenceType __i, _DifferenceType __j,
1207                                                          _DifferenceType __local_min) -> _DifferenceType {
1208                 // Create mask
1209                 __calc_mask(__mask + __i, __mask + __j, __first + __i);
1210 
1211                 // if minimum was found in a previous range we shouldn't do anymore
1212                 if (__local_min < __i)
1213                 {
1214                     return __local_min;
1215                 }
1216                 // find first iterator that should be removed
1217                 bool* __result =
1218                     __internal::__brick_find_if(__mask + __i, __mask + __j, [](bool __val) { return !__val; }, __is_vector);
1219                 if (__result - __mask == __j)
1220                 {
1221                     return __local_min;
1222                 }
1223                 return std::min(__local_min, _DifferenceType(__result - __mask));
1224             },
1225             [](_DifferenceType __local_min1, _DifferenceType __local_min2) -> _DifferenceType {
1226                 return std::min(__local_min1, __local_min2);
1227             });
1228 
1229         // No elements to remove - exit
1230         if (__min == __n)
1231         {
1232             return __last;
1233         }
1234         __n -= __min;
1235         __first += __min;
1236 
1237         __par_backend::__buffer<_Tp> __buf(__n);
1238         _Tp* __result = __buf.get();
1239         __mask += __min;
1240         _DifferenceType __m{};
1241         // 2. Elements that doesn't satisfy pred are moved to result
1242         __par_backend::__parallel_strict_scan(
1243             std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
1244             [__mask, __is_vector](_DifferenceType __i, _DifferenceType __len) {
1245                 return __internal::__brick_count(__mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, __is_vector);
1246             },
1247             std::plus<_DifferenceType>(),
1248             [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) {
1249                 __internal::__brick_copy_by_mask(__first + __i, __first + __i + __len, __result + __initial, __mask + __i,
1250                                      [](_ForwardIterator __x, _Tp* __z) {
1251                                          __internal::__invoke_if_else(std::is_trivial<_Tp>(), [&]() { *__z = std::move(*__x); },
1252                                                           [&]() { ::new (std::addressof(*__z)) _Tp(std::move(*__x)); });
1253                                      },
1254                                      __is_vector);
1255             },
1256             [&__m](_DifferenceType __total) { __m = __total; });
1257 
1258         // 3. Elements from result are moved to [first, last)
1259         __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __result, __result + __m,
1260                                       [__result, __first, __is_vector](_Tp* __i, _Tp* __j) {
1261                                           __internal::__brick_move(__i, __j, __first + (__i - __result), __is_vector);
1262                                       });
1263         return __first + __m;
1264     });
1265 }
1266 #endif
1267 
1268 #if __PSTL_USE_PAR_POLICIES
1269 template <class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate, class _IsVector>
1270 _ForwardIterator
1271 __pattern_unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred,
1272                  _IsVector __is_vector, /*is_parallel=*/std::true_type) noexcept
1273 {
1274     typedef typename std::iterator_traits<_ForwardIterator>::reference _ReferenceType;
1275 
1276     if (__first == __last)
1277     {
1278         return __last;
1279     }
1280     if (__first + 1 == __last || __first + 2 == __last)
1281     {
1282         // Trivial sequence - use serial algorithm
1283         return __internal::__brick_unique(__first, __last, __pred, __is_vector);
1284     }
1285     return __internal::__remove_elements(
1286         std::forward<_ExecutionPolicy>(__exec), ++__first, __last,
1287         [&__pred, __is_vector](bool* __b, bool* __e, _ForwardIterator __it) {
1288             __internal::__brick_walk3(__b, __e, __it - 1, __it,
1289                           [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); },
1290                           __is_vector);
1291         },
1292         __is_vector);
1293 }
1294 #endif
1295 
1296 //------------------------------------------------------------------------
1297 // unique_copy
1298 //------------------------------------------------------------------------
1299 
1300 template <class _ForwardIterator, class OutputIterator, class _BinaryPredicate>
1301 OutputIterator
1302 __brick_unique_copy(_ForwardIterator __first, _ForwardIterator __last, OutputIterator __result, _BinaryPredicate __pred,
1303                     /*vector=*/std::false_type) noexcept
1304 {
1305     return std::unique_copy(__first, __last, __result, __pred);
1306 }
1307 
1308 template <class _RandomAccessIterator, class OutputIterator, class _BinaryPredicate>
1309 OutputIterator
1310 __brick_unique_copy(_RandomAccessIterator __first, _RandomAccessIterator __last, OutputIterator __result,
1311                     _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept
1312 {
1313 #if (__PSTL_MONOTONIC_PRESENT)
1314     return __unseq_backend::__simd_unique_copy(__first, __last - __first, __result, __pred);
1315 #else
1316     return std::unique_copy(__first, __last, __result, __pred);
1317 #endif
1318 }
1319 
1320 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryPredicate,
1321           class _IsVector>
1322 _OutputIterator
1323 __pattern_unique_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
1324                       _BinaryPredicate __pred, _IsVector __is_vector, /*parallel=*/std::false_type) noexcept
1325 {
1326     return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector);
1327 }
1328 
1329 template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
1330 _DifferenceType
1331 __brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask,
1332                     _BinaryPredicate __pred, /*vector=*/std::false_type) noexcept
1333 {
1334     _DifferenceType __count = 0;
1335     for (; __first != __last; ++__first, ++__mask)
1336     {
1337         *__mask = !__pred(*__first, *(__first - 1));
1338         __count += *__mask;
1339     }
1340     return __count;
1341 }
1342 
1343 template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
1344 _DifferenceType
1345 __brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask,
1346                     _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept
1347 {
1348     return __unseq_backend::__simd_calc_mask_2(__first, __last - __first, __mask, __pred);
1349 }
1350 
1351 #if __PSTL_USE_PAR_POLICIES
1352 template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _BinaryPredicate,
1353           class _IsVector>
1354 _OutputIterator
1355 __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
1356                       _OutputIterator __result, _BinaryPredicate __pred, _IsVector __is_vector,
1357                       /*parallel=*/std::true_type)
1358 {
1359     typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
1360     const _DifferenceType __n = __last - __first;
1361     if (_DifferenceType(2) < __n)
1362     {
1363         __par_backend::__buffer<bool> __mask_buf(__n);
1364         if (_DifferenceType(2) < __n)
1365         {
1366             return __internal::__except_handler([&__exec, __n, __first, __result, __pred, __is_vector, &__mask_buf]() {
1367                 bool* __mask = __mask_buf.get();
1368                 _DifferenceType __m{};
1369                 __par_backend::__parallel_strict_scan(
1370                     std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
1371                     [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce
1372                         _DifferenceType __extra = 0;
1373                         if (__i == 0)
1374                         {
1375                             // Special boundary case
1376                             __mask[__i] = true;
1377                             if (--__len == 0)
1378                                 return 1;
1379                             ++__i;
1380                             ++__extra;
1381                         }
1382                         return __internal::__brick_calc_mask_2<_DifferenceType>(__first + __i, __first + (__i + __len),
1383                                                                     __mask + __i, __pred, __is_vector) +
1384                                __extra;
1385                     },
1386                     std::plus<_DifferenceType>(),                                                // Combine
1387                     [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
1388                         // Phase 2 is same as for __pattern_copy_if
1389                         __internal::__brick_copy_by_mask(__first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
1390                                              [](_RandomAccessIterator __x, _OutputIterator __z) { *__z = *__x; },
1391                                              __is_vector);
1392                     },
1393                     [&__m](_DifferenceType __total) { __m = __total; });
1394                 return __result + __m;
1395             });
1396         }
1397     }
1398     // trivial sequence - use serial algorithm
1399     return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector);
1400 }
1401 #endif
1402 
1403 //------------------------------------------------------------------------
1404 // reverse
1405 //------------------------------------------------------------------------
1406 template <class _BidirectionalIterator>
1407 void
1408 __brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, /*__is_vector=*/std::false_type) noexcept
1409 {
1410     std::reverse(__first, __last);
1411 }
1412 
1413 template <class _BidirectionalIterator>
1414 void
1415 __brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, /*__is_vector=*/std::true_type) noexcept
1416 {
1417     typedef typename std::iterator_traits<_BidirectionalIterator>::reference _ReferenceType;
1418 
1419     const auto __n = (__last - __first) / 2;
1420     __unseq_backend::__simd_walk_2(__first, __n, std::reverse_iterator<_BidirectionalIterator>(__last),
1421                                    [](_ReferenceType __x, _ReferenceType __y) {
1422                                        using std::swap;
1423                                        swap(__x, __y);
1424                                    });
1425 }
1426 
1427 // this brick is called in parallel version, so we can use iterator arithmetic
1428 template <class _BidirectionalIterator>
1429 void
1430 __brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, _BidirectionalIterator __d_last,
1431                 /*is_vector=*/std::false_type) noexcept
1432 {
1433     for (--__d_last; __first != __last; ++__first, --__d_last)
1434     {
1435         using std::iter_swap;
1436         iter_swap(__first, __d_last);
1437     }
1438 }
1439 
1440 // this brick is called in parallel version, so we can use iterator arithmetic
1441 template <class _BidirectionalIterator>
1442 void
1443 __brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, _BidirectionalIterator __d_last,
1444                 /*is_vector=*/std::true_type) noexcept
1445 {
1446     typedef typename std::iterator_traits<_BidirectionalIterator>::reference _ReferenceType;
1447 
1448     __unseq_backend::__simd_walk_2(__first, __last - __first, std::reverse_iterator<_BidirectionalIterator>(__d_last),
1449                                    [](_ReferenceType __x, _ReferenceType __y) {
1450                                        using std::swap;
1451                                        swap(__x, __y);
1452                                    });
1453 }
1454 
1455 template <class _ExecutionPolicy, class _BidirectionalIterator, class _IsVector>
1456 void
1457 __pattern_reverse(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last,
1458                   _IsVector _is_vector,
1459                   /*is_parallel=*/std::false_type) noexcept
1460 {
1461     __internal::__brick_reverse(__first, __last, _is_vector);
1462 }
1463 
1464 #if __PSTL_USE_PAR_POLICIES
1465 template <class _ExecutionPolicy, class _BidirectionalIterator, class _IsVector>
1466 void
1467 __pattern_reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last,
1468                   _IsVector __is_vector, /*is_parallel=*/std::true_type)
1469 {
1470     __par_backend::__parallel_for(
1471         std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2,
1472         [__is_vector, __first, __last](_BidirectionalIterator __inner_first, _BidirectionalIterator __inner_last) {
1473             __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), __is_vector);
1474         });
1475 }
1476 #endif
1477 
1478 //------------------------------------------------------------------------
1479 // reverse_copy
1480 //------------------------------------------------------------------------
1481 
1482 template <class _BidirectionalIterator, class _OutputIterator>
1483 _OutputIterator
1484 __brick_reverse_copy(_BidirectionalIterator __first, _BidirectionalIterator __last, _OutputIterator __d_first,
1485                      /*is_vector=*/std::false_type) noexcept
1486 {
1487     return std::reverse_copy(__first, __last, __d_first);
1488 }
1489 
1490 template <class _BidirectionalIterator, class _OutputIterator>
1491 _OutputIterator
1492 __brick_reverse_copy(_BidirectionalIterator __first, _BidirectionalIterator __last, _OutputIterator __d_first,
1493                      /*is_vector=*/std::true_type) noexcept
1494 {
1495     typedef typename std::iterator_traits<_BidirectionalIterator>::reference _ReferenceType1;
1496     typedef typename std::iterator_traits<_OutputIterator>::reference _ReferenceType2;
1497 
1498     return __unseq_backend::__simd_walk_2(std::reverse_iterator<_BidirectionalIterator>(__last), __last - __first,
1499                                           __d_first, [](_ReferenceType1 __x, _ReferenceType2 __y) { __y = __x; });
1500 }
1501 
1502 template <class _ExecutionPolicy, class _BidirectionalIterator, class _OutputIterator, class _IsVector>
1503 _OutputIterator
1504 __pattern_reverse_copy(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last,
1505                        _OutputIterator __d_first, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
1506 {
1507     return __internal::__brick_reverse_copy(__first, __last, __d_first, __is_vector);
1508 }
1509 
1510 #if __PSTL_USE_PAR_POLICIES
1511 template <class _ExecutionPolicy, class _BidirectionalIterator, class _OutputIterator, class _IsVector>
1512 _OutputIterator
1513 __pattern_reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last,
1514                        _OutputIterator __d_first, _IsVector __is_vector, /*is_parallel=*/std::true_type)
1515 {
1516     auto __len = __last - __first;
1517     __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last,
1518                                   [__is_vector, __first, __len, __d_first](_BidirectionalIterator __inner_first,
1519                                                                            _BidirectionalIterator __inner_last) {
1520                                       __internal::__brick_reverse_copy(__inner_first, __inner_last,
1521                                                            __d_first + (__len - (__inner_last - __first)), __is_vector);
1522                                   });
1523     return __d_first + __len;
1524 }
1525 #endif
1526 
1527 //------------------------------------------------------------------------
1528 // rotate
1529 //------------------------------------------------------------------------
1530 template <class _ForwardIterator>
1531 _ForwardIterator
1532 __brick_rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last,
1533                /*is_vector=*/std::false_type) noexcept
1534 {
1535 #if __PSTL_CPP11_STD_ROTATE_BROKEN
1536     std::rotate(__first, __middle, __last);
1537     return std::next(__first, std::distance(__middle, __last));
1538 #else
1539     return std::rotate(__first, __middle, __last);
1540 #endif
1541 }
1542 
1543 template <class _ForwardIterator>
1544 _ForwardIterator
1545 __brick_rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last,
1546                /*is_vector=*/std::true_type) noexcept
1547 {
1548     auto __n = __last - __first;
1549     auto __m = __middle - __first;
1550     const _ForwardIterator __ret = __first + (__last - __middle);
1551 
1552     bool __is_left = (__m <= __n / 2);
1553     if (!__is_left)
1554         __m = __n - __m;
1555 
1556     while (__n > 1 && __m > 0)
1557     {
1558         using std::iter_swap;
1559         const auto __m_2 = __m * 2;
1560         if (__is_left)
1561         {
1562             for (; __last - __first >= __m_2; __first += __m)
1563             {
1564                 __unseq_backend::__simd_assign(__first, __m, __first + __m,
1565                                                iter_swap<_ForwardIterator, _ForwardIterator>);
1566             }
1567         }
1568         else
1569         {
1570             for (; __last - __first >= __m_2; __last -= __m)
1571             {
1572                 __unseq_backend::__simd_assign(__last - __m, __m, __last - __m_2,
1573                                                iter_swap<_ForwardIterator, _ForwardIterator>);
1574             }
1575         }
1576         __is_left = !__is_left;
1577         __m = __n % __m;
1578         __n = __last - __first;
1579     }
1580 
1581     return __ret;
1582 }
1583 
1584 template <class _ExecutionPolicy, class _ForwardIterator, class _IsVector>
1585 _ForwardIterator
1586 __pattern_rotate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last,
1587                  _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
1588 {
1589     return __internal::__brick_rotate(__first, __middle, __last, __is_vector);
1590 }
1591 
1592 #if __PSTL_USE_PAR_POLICIES
1593 template <class _ExecutionPolicy, class _ForwardIterator, class _IsVector>
1594 _ForwardIterator
1595 __pattern_rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle,
1596                  _ForwardIterator __last, _IsVector __is_vector, /*is_parallel=*/std::true_type)
1597 {
1598     typedef typename std::iterator_traits<_ForwardIterator>::value_type _Tp;
1599     auto __n = __last - __first;
1600     auto __m = __middle - __first;
1601     if (__m <= __n / 2)
1602     {
1603         __par_backend::__buffer<_Tp> __buf(__n - __m);
1604         return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() {
1605             _Tp* __result = __buf.get();
1606             __par_backend::__parallel_for(
1607                 std::forward<_ExecutionPolicy>(__exec), __middle, __last,
1608                 [__middle, __result, __is_vector](_ForwardIterator __b, _ForwardIterator __e) {
1609                     __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __middle), __is_vector);
1610                 });
1611 
1612             __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __middle,
1613                                           [__last, __middle, __is_vector](_ForwardIterator __b, _ForwardIterator __e) {
1614                                               __internal::__brick_move(__b, __e, __b + (__last - __middle), __is_vector);
1615                                           });
1616 
1617             __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __result, __result + (__n - __m),
1618                                           [__first, __result, __is_vector](_Tp* __b, _Tp* __e) {
1619                                               __internal::__brick_move(__b, __e, __first + (__b - __result), __is_vector);
1620                                           });
1621 
1622             return __first + (__last - __middle);
1623         });
1624     }
1625     else
1626     {
1627         __par_backend::__buffer<_Tp> __buf(__m);
1628         return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() {
1629             _Tp* __result = __buf.get();
1630             __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __middle,
1631                                           [__first, __result, __is_vector](_ForwardIterator __b, _ForwardIterator __e) {
1632                                               __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __first),
1633                                                                          __is_vector);
1634                                           });
1635 
1636             __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __middle, __last,
1637                                           [__first, __middle, __is_vector](_ForwardIterator __b, _ForwardIterator __e) {
1638                                               __internal::__brick_move(__b, __e, __first + (__b - __middle), __is_vector);
1639                                           });
1640 
1641             __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __result, __result + __m,
1642                                           [__n, __m, __first, __result, __is_vector](_Tp* __b, _Tp* __e) {
1643                                               __internal::__brick_move(__b, __e, __first + ((__n - __m) + (__b - __result)),
1644                                                            __is_vector);
1645                                           });
1646 
1647             return __first + (__last - __middle);
1648         });
1649     }
1650 }
1651 #endif
1652 
1653 //------------------------------------------------------------------------
1654 // rotate_copy
1655 //------------------------------------------------------------------------
1656 
1657 template <class _ForwardIterator, class _OutputIterator>
1658 _OutputIterator
1659 __brick_rotate_copy(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last,
1660                     _OutputIterator __result, /*__is_vector=*/std::false_type) noexcept
1661 {
1662     return std::rotate_copy(__first, __middle, __last, __result);
1663 }
1664 
1665 template <class _ForwardIterator, class _OutputIterator>
1666 _OutputIterator
1667 __brick_rotate_copy(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last,
1668                     _OutputIterator __result, /*__is_vector=*/std::true_type) noexcept
1669 {
1670     _OutputIterator __res = __internal::__brick_copy(__middle, __last, __result, std::true_type());
1671     return __internal::__brick_copy(__first, __middle, __res, std::true_type());
1672 }
1673 
1674 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _IsVector>
1675 _OutputIterator
1676 __pattern_rotate_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last,
1677                       _OutputIterator __result, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
1678 {
1679     return __internal::__brick_rotate_copy(__first, __middle, __last, __result, __is_vector);
1680 }
1681 
1682 #if __PSTL_USE_PAR_POLICIES
1683 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _IsVector>
1684 _OutputIterator
1685 __pattern_rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle,
1686                       _ForwardIterator __last, _OutputIterator __result, _IsVector __is_vector,
1687                       /*is_parallel=*/std::true_type)
1688 {
1689     __par_backend::__parallel_for(
1690         std::forward<_ExecutionPolicy>(__exec), __first, __last,
1691         [__first, __last, __middle, __result, __is_vector](_ForwardIterator __b, _ForwardIterator __e) {
1692             if (__b > __middle)
1693             {
1694                 __internal::__brick_copy(__b, __e, __result + (__b - __middle), __is_vector);
1695             }
1696             else
1697             {
1698                 _OutputIterator __new_result = __result + ((__last - __middle) + (__b - __first));
1699                 if (__e < __middle)
1700                 {
1701                     __internal::__brick_copy(__b, __e, __new_result, __is_vector);
1702                 }
1703                 else
1704                 {
1705                     __internal::__brick_copy(__b, __middle, __new_result, __is_vector);
1706                     __internal::__brick_copy(__middle, __e, __result, __is_vector);
1707                 }
1708             }
1709         });
1710     return __result + (__last - __first);
1711 }
1712 #endif
1713 
1714 //------------------------------------------------------------------------
1715 // is_partitioned
1716 //------------------------------------------------------------------------
1717 
1718 template <class _ForwardIterator, class _UnaryPredicate>
1719 bool
1720 __brick_is_partitioned(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
1721                        /*is_vector=*/std::false_type) noexcept
1722 {
1723     return std::is_partitioned(__first, __last, __pred);
1724 }
1725 
1726 template <class _ForwardIterator, class _UnaryPredicate>
1727 bool
1728 __brick_is_partitioned(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
1729                        /*is_vector=*/std::true_type) noexcept
1730 {
1731     typedef typename std::iterator_traits<_ForwardIterator>::difference_type _SizeType;
1732     if (__first == __last)
1733     {
1734         return true;
1735     }
1736     else
1737     {
1738         _ForwardIterator __result = __unseq_backend::__simd_first(
1739             __first, _SizeType(0), __last - __first,
1740             [&__pred](_ForwardIterator __it, _SizeType __i) { return !__pred(__it[__i]); });
1741         if (__result == __last)
1742         {
1743             return true;
1744         }
1745         else
1746         {
1747             ++__result;
1748             return !__unseq_backend::__simd_or(__result, __last - __result, __pred);
1749         }
1750     }
1751 }
1752 
1753 template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate, class _IsVector>
1754 bool
1755 __pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
1756                          _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
1757 {
1758     return __internal::__brick_is_partitioned(__first, __last, __pred, __is_vector);
1759 }
1760 
1761 #if __PSTL_USE_PAR_POLICIES
1762 template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate, class _IsVector>
1763 bool
1764 __pattern_is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
1765                          _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/std::true_type)
1766 {
1767     if (__first == __last)
1768     {
1769         return true;
1770     }
1771     else
1772     {
1773         return __internal::__except_handler([&]() {
1774             // State of current range:
1775             // broken     - current range is not partitioned by pred
1776             // all_true   - all elements in current range satisfy pred
1777             // all_false  - all elements in current range don't satisfy pred
1778             // true_false - elements satisfy pred are placed before elements that don't satisfy pred
1779             enum _ReduceType
1780             {
1781                 __not_init = -1,
1782                 __broken,
1783                 __all_true,
1784                 __all_false,
1785                 __true_false
1786             };
1787             _ReduceType __init = __not_init;
1788 
1789             // Array with states that we'll have when state from the left branch is merged with state from the right branch.
1790             // State is calculated by formula: new_state = table[left_state * 4 + right_state]
1791             _ReduceType __table[] = {__broken,     __broken,     __broken,     __broken, __broken,    __all_true,
1792                                      __true_false, __true_false, __broken,     __broken, __all_false, __broken,
1793                                      __broken,     __broken,     __true_false, __broken};
1794 
1795             __init = __par_backend::__parallel_reduce(
1796                 std::forward<_ExecutionPolicy>(__exec), __first, __last, __init,
1797                 [&__pred, &__table, __is_vector](_ForwardIterator __i, _ForwardIterator __j,
1798                                                  _ReduceType __value) -> _ReduceType {
1799                     if (__value == __broken)
1800                     {
1801                         return __broken;
1802                     }
1803                     _ReduceType __res = __not_init;
1804                     // if first element satisfy pred
1805                     if (__pred(*__i))
1806                     {
1807                         // find first element that don't satisfy pred
1808                         _ForwardIterator __x =
1809                             __internal::__brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate>(__pred), __is_vector);
1810                         if (__x != __j)
1811                         {
1812                             // find first element after "x" that satisfy pred
1813                             _ForwardIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, __is_vector);
1814                             // if it was found then range isn't partitioned by pred
1815                             if (__y != __j)
1816                             {
1817                                 return __broken;
1818                             }
1819                             else
1820                             {
1821                                 __res = __true_false;
1822                             }
1823                         }
1824                         else
1825                         {
1826                             __res = __all_true;
1827                         }
1828                     }
1829                     else
1830                     { // if first element doesn't satisfy pred
1831                         // then we should find the first element that satisfy pred.
1832                         // If we found it then range isn't partitioned by pred
1833                         if (__internal::__brick_find_if(__i + 1, __j, __pred, __is_vector) != __j)
1834                         {
1835                             return __broken;
1836                         }
1837                         else
1838                         {
1839                             __res = __all_false;
1840                         }
1841                     }
1842                     // if we have value from left range then we should calculate the result
1843                     return (__value == -1) ? __res : __table[__value * 4 + __res];
1844                 },
1845 
1846                 [&__table](_ReduceType __val1, _ReduceType __val2) -> _ReduceType {
1847                     if (__val1 == __broken || __val2 == __broken)
1848                     {
1849                         return __broken;
1850                     }
1851                     // calculate the result for new big range
1852                     return __table[__val1 * 4 + __val2];
1853                 });
1854             return __init != __broken;
1855         });
1856     }
1857 }
1858 #endif
1859 
1860 //------------------------------------------------------------------------
1861 // partition
1862 //------------------------------------------------------------------------
1863 
1864 template <class _ForwardIterator, class _UnaryPredicate>
1865 _ForwardIterator
1866 __brick_partition(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
1867                   /*is_vector=*/std::false_type) noexcept
1868 {
1869     return std::partition(__first, __last, __pred);
1870 }
1871 
1872 template <class _ForwardIterator, class _UnaryPredicate>
1873 _ForwardIterator
1874 __brick_partition(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
1875                   /*is_vector=*/std::true_type) noexcept
1876 {
1877     __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
1878     return std::partition(__first, __last, __pred);
1879 }
1880 
1881 template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate, class _IsVector>
1882 _ForwardIterator
1883 __pattern_partition(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
1884                     _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
1885 {
1886     return __internal::__brick_partition(__first, __last, __pred, __is_vector);
1887 }
1888 
1889 #if __PSTL_USE_PAR_POLICIES
1890 template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate, class _IsVector>
1891 _ForwardIterator
1892 __pattern_partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
1893                     _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/std::true_type)
1894 {
1895 
1896     // partitioned range: elements before pivot satisfy pred (true part),
1897     //                    elements after pivot don't satisfy pred (false part)
1898     struct _PartitionRange
1899     {
1900         _ForwardIterator __begin;
1901         _ForwardIterator __pivot;
1902         _ForwardIterator __end;
1903     };
1904 
1905     return __internal::__except_handler([&]() {
1906         _PartitionRange __init{__last, __last, __last};
1907 
1908         // lambda for merging two partitioned ranges to one partitioned range
1909         auto __reductor = [&__exec, __is_vector](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange {
1910             auto __size1 = __val1.__end - __val1.__pivot;
1911             auto __size2 = __val2.__pivot - __val2.__begin;
1912             auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin);
1913 
1914             // if all elements in left range satisfy pred then we can move new pivot to pivot of right range
1915             if (__val1.__end == __val1.__pivot)
1916             {
1917                 return {__new_begin, __val2.__pivot, __val2.__end};
1918             }
1919             // if true part of right range greater than false part of left range
1920             // then we should swap the false part of left range and last part of true part of right range
1921             else if (__size2 > __size1)
1922             {
1923                 __par_backend::__parallel_for(
1924                     std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1,
1925                     [__val1, __val2, __size1, __is_vector](_ForwardIterator __i, _ForwardIterator __j) {
1926                         __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), __is_vector);
1927                     });
1928                 return {__new_begin, __val2.__pivot - __size1, __val2.__end};
1929             }
1930             // else we should swap the first part of false part of left range and true part of right range
1931             else
1932             {
1933                 __par_backend::__parallel_for(
1934                     std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2,
1935                     [__val1, __val2, __is_vector](_ForwardIterator __i, _ForwardIterator __j) {
1936                         __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), __is_vector);
1937                     });
1938                 return {__new_begin, __val1.__pivot + __size2, __val2.__end};
1939             }
1940         };
1941 
1942         _PartitionRange __result = __par_backend::__parallel_reduce(
1943             std::forward<_ExecutionPolicy>(__exec), __first, __last, __init,
1944             [__pred, __is_vector, __reductor](_ForwardIterator __i, _ForwardIterator __j,
1945                                               _PartitionRange __value) -> _PartitionRange {
1946                 //1. serial partition
1947                 _ForwardIterator __pivot = __internal::__brick_partition(__i, __j, __pred, __is_vector);
1948 
1949                 // 2. merging of two ranges (left and right respectively)
1950                 return __reductor(__value, {__i, __pivot, __j});
1951             },
1952             __reductor);
1953         return __result.__pivot;
1954     });
1955 }
1956 #endif
1957 
1958 //------------------------------------------------------------------------
1959 // stable_partition
1960 //------------------------------------------------------------------------
1961 
1962 template <class _BidirectionalIterator, class _UnaryPredicate>
1963 _BidirectionalIterator
1964 __brick_stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last, _UnaryPredicate __pred,
1965                          /*__is_vector=*/std::false_type) noexcept
1966 {
1967     return std::stable_partition(__first, __last, __pred);
1968 }
1969 
1970 template <class _BidirectionalIterator, class _UnaryPredicate>
1971 _BidirectionalIterator
1972 __brick_stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last, _UnaryPredicate __pred,
1973                          /*__is_vector=*/std::true_type) noexcept
1974 {
1975     __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
1976     return std::stable_partition(__first, __last, __pred);
1977 }
1978 
1979 template <class _ExecutionPolicy, class _BidirectionalIterator, class _UnaryPredicate, class _IsVector>
1980 _BidirectionalIterator
1981 __pattern_stable_partition(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last,
1982                            _UnaryPredicate __pred, _IsVector __is_vector,
1983                            /*is_parallelization=*/std::false_type) noexcept
1984 {
1985     return __internal::__brick_stable_partition(__first, __last, __pred, __is_vector);
1986 }
1987 
1988 #if __PSTL_USE_PAR_POLICIES
1989 template <class _ExecutionPolicy, class _BidirectionalIterator, class _UnaryPredicate, class _IsVector>
1990 _BidirectionalIterator
1991 __pattern_stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last,
1992                            _UnaryPredicate __pred, _IsVector __is_vector,
1993                            /*is_parallelization=*/std::true_type) noexcept
1994 {
1995     // partitioned range: elements before pivot satisfy pred (true part),
1996     //                    elements after pivot don't satisfy pred (false part)
1997     struct _PartitionRange
1998     {
1999         _BidirectionalIterator __begin;
2000         _BidirectionalIterator __pivot;
2001         _BidirectionalIterator __end;
2002     };
2003 
2004     return __internal::__except_handler([&]() {
2005         _PartitionRange __init{__last, __last, __last};
2006 
2007         // lambda for merging two partitioned ranges to one partitioned range
2008         auto __reductor = [__is_vector](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange {
2009             auto __size1 = __val1.__end - __val1.__pivot;
2010             auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin);
2011 
2012             // if all elements in left range satisfy pred then we can move new pivot to pivot of right range
2013             if (__val1.__end == __val1.__pivot)
2014             {
2015                 return {__new_begin, __val2.__pivot, __val2.__end};
2016             }
2017             // if true part of right range greater than false part of left range
2018             // then we should swap the false part of left range and last part of true part of right range
2019             else
2020             {
2021                 __internal::__brick_rotate(__val1.__pivot, __val2.__begin, __val2.__pivot, __is_vector);
2022                 return {__new_begin, __val2.__pivot - __size1, __val2.__end};
2023             }
2024         };
2025 
2026         _PartitionRange __result = __par_backend::__parallel_reduce(
2027             std::forward<_ExecutionPolicy>(__exec), __first, __last, __init,
2028             [&__pred, __is_vector, __reductor](_BidirectionalIterator __i, _BidirectionalIterator __j,
2029                                                _PartitionRange __value) -> _PartitionRange {
2030                 //1. serial stable_partition
2031                 _BidirectionalIterator __pivot = __internal::__brick_stable_partition(__i, __j, __pred, __is_vector);
2032 
2033                 // 2. merging of two ranges (left and right respectively)
2034                 return __reductor(__value, {__i, __pivot, __j});
2035             },
2036             __reductor);
2037         return __result.__pivot;
2038     });
2039 }
2040 #endif
2041 
2042 //------------------------------------------------------------------------
2043 // partition_copy
2044 //------------------------------------------------------------------------
2045 
2046 template <class _ForwardIterator, class _OutputIterator1, class _OutputIterator2, class _UnaryPredicate>
2047 std::pair<_OutputIterator1, _OutputIterator2>
2048 __brick_partition_copy(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator1 __out_true,
2049                        _OutputIterator2 __out_false, _UnaryPredicate __pred, /*is_vector=*/std::false_type) noexcept
2050 {
2051     return std::partition_copy(__first, __last, __out_true, __out_false, __pred);
2052 }
2053 
2054 template <class _ForwardIterator, class _OutputIterator1, class _OutputIterator2, class _UnaryPredicate>
2055 std::pair<_OutputIterator1, _OutputIterator2>
2056 __brick_partition_copy(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator1 __out_true,
2057                        _OutputIterator2 __out_false, _UnaryPredicate __pred, /*is_vector=*/std::true_type) noexcept
2058 {
2059 #if (__PSTL_MONOTONIC_PRESENT)
2060     return __unseq_backend::__simd_partition_copy(__first, __last - __first, __out_true, __out_false, __pred);
2061 #else
2062     return std::partition_copy(__first, __last, __out_true, __out_false, __pred);
2063 #endif
2064 }
2065 
2066 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator1, class _OutputIterator2,
2067           class _UnaryPredicate, class _IsVector>
2068 std::pair<_OutputIterator1, _OutputIterator2>
2069 __pattern_partition_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
2070                          _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred,
2071                          _IsVector __is_vector, /*is_parallelization=*/std::false_type) noexcept
2072 {
2073     return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector);
2074 }
2075 
2076 #if __PSTL_USE_PAR_POLICIES
2077 template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator1, class _OutputIterator2,
2078           class _UnaryPredicate, class _IsVector>
2079 std::pair<_OutputIterator1, _OutputIterator2>
2080 __pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
2081                          _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred,
2082                          _IsVector __is_vector, /*is_parallelization=*/std::true_type)
2083 {
2084     typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
2085     typedef std::pair<_DifferenceType, _DifferenceType> _ReturnType;
2086     const _DifferenceType __n = __last - __first;
2087     if (_DifferenceType(1) < __n)
2088     {
2089         __par_backend::__buffer<bool> __mask_buf(__n);
2090         return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __is_vector, __pred, &__mask_buf]() {
2091             bool* __mask = __mask_buf.get();
2092             _ReturnType __m{};
2093             __par_backend::__parallel_strict_scan(
2094                 std::forward<_ExecutionPolicy>(__exec), __n, std::make_pair(_DifferenceType(0), _DifferenceType(0)),
2095                 [=](_DifferenceType __i, _DifferenceType __len) { // Reduce
2096                     return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), __mask + __i,
2097                                                                 __pred, __is_vector);
2098                 },
2099                 [](const _ReturnType& __x, const _ReturnType& __y) -> _ReturnType {
2100                     return std::make_pair(__x.first + __y.first, __x.second + __y.second);
2101                 },                                                                       // Combine
2102                 [=](_DifferenceType __i, _DifferenceType __len, _ReturnType __initial) { // Scan
2103                     __internal::__brick_partition_by_mask(__first + __i, __first + (__i + __len), __out_true + __initial.first,
2104                                               __out_false + __initial.second, __mask + __i, __is_vector);
2105                 },
2106                 [&__m](_ReturnType __total) { __m = __total; });
2107             return std::make_pair(__out_true + __m.first, __out_false + __m.second);
2108         });
2109     }
2110     // trivial sequence - use serial algorithm
2111     return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector);
2112 }
2113 #endif
2114 
2115 //------------------------------------------------------------------------
2116 // sort
2117 //------------------------------------------------------------------------
2118 
2119 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector,
2120           class _IsMoveConstructible>
2121 void
2122 __pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
2123                _IsVector /*is_vector*/, /*is_parallel=*/std::false_type, _IsMoveConstructible) noexcept
2124 {
2125     std::sort(__first, __last, __comp);
2126 }
2127 
2128 #if __PSTL_USE_PAR_POLICIES
2129 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector>
2130 void
2131 __pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
2132                _IsVector /*is_vector*/, /*is_parallel=*/std::true_type, /*is_move_constructible=*/std::true_type)
2133 {
2134     __internal::__except_handler([&]() {
2135         __par_backend::__parallel_stable_sort(std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp,
2136                                               [](_RandomAccessIterator __first, _RandomAccessIterator __last,
2137                                                  _Compare __comp) { std::sort(__first, __last, __comp); },
2138                                               __last - __first);
2139     });
2140 }
2141 #endif
2142 
2143 //------------------------------------------------------------------------
2144 // stable_sort
2145 //------------------------------------------------------------------------
2146 
2147 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector>
2148 void
2149 __pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
2150                       _IsVector /*is_vector*/, /*is_parallel=*/std::false_type) noexcept
2151 {
2152     std::stable_sort(__first, __last, __comp);
2153 }
2154 
2155 #if __PSTL_USE_PAR_POLICIES
2156 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector>
2157 void
2158 __pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
2159                       _Compare __comp, _IsVector /*is_vector*/, /*is_parallel=*/std::true_type)
2160 {
2161     __internal::__except_handler([&]() {
2162         __par_backend::__parallel_stable_sort(std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp,
2163                                               [](_RandomAccessIterator __first, _RandomAccessIterator __last,
2164                                                  _Compare __comp) { std::stable_sort(__first, __last, __comp); });
2165     });
2166 }
2167 #endif
2168 
2169 //------------------------------------------------------------------------
2170 // partial_sort
2171 //------------------------------------------------------------------------
2172 
2173 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector>
2174 void
2175 __pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle,
2176                        _RandomAccessIterator __last, _Compare __comp, _IsVector,
2177                        /*is_parallel=*/std::false_type) noexcept
2178 {
2179     std::partial_sort(__first, __middle, __last, __comp);
2180 }
2181 
2182 #if __PSTL_USE_PAR_POLICIES
2183 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector>
2184 void
2185 __pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle,
2186                        _RandomAccessIterator __last, _Compare __comp, _IsVector, /*is_parallel=*/std::true_type)
2187 {
2188     const auto __n = __middle - __first;
2189     __internal::__except_handler([&]() {
2190         __par_backend::__parallel_stable_sort(
2191             std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp,
2192             [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) {
2193                 if (__n < __end - __begin)
2194                     std::partial_sort(__begin, __begin + __n, __end, __comp);
2195                 else
2196                     std::sort(__begin, __end, __comp);
2197             },
2198             __n);
2199     });
2200 }
2201 #endif
2202 
2203 //------------------------------------------------------------------------
2204 // partial_sort_copy
2205 //------------------------------------------------------------------------
2206 
2207 template <class _ExecutionPolicy, class _ForwardIterator, class _RandomAccessIterator, class _Compare, class _IsVector>
2208 _RandomAccessIterator
2209 __pattern_partial_sort_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
2210                             _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp, _IsVector,
2211                             /*is_parallel=*/std::false_type) noexcept
2212 {
2213     return std::partial_sort_copy(__first, __last, __d_first, __d_last, __comp);
2214 }
2215 
2216 #if __PSTL_USE_PAR_POLICIES
2217 template <class _ExecutionPolicy, class _ForwardIterator, class _RandomAccessIterator, class _Compare, class _IsVector>
2218 _RandomAccessIterator
2219 __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
2220                             _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp,
2221                             _IsVector __is_vector, /*is_parallel=*/std::true_type)
2222 {
2223     if (__last == __first || __d_last == __d_first)
2224     {
2225         return __d_first;
2226     }
2227     auto __n1 = __last - __first;
2228     auto __n2 = __d_last - __d_first;
2229     return __internal::__except_handler([&]() {
2230         if (__n2 >= __n1)
2231         {
2232             __par_backend::__parallel_stable_sort(
2233                 std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp,
2234                 [__first, __d_first, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j,
2235                                                   _Compare __comp) {
2236                     _ForwardIterator __i1 = __first + (__i - __d_first);
2237                     _ForwardIterator __j1 = __first + (__j - __d_first);
2238 
2239                 // 1. Copy elements from input to output
2240 #if !__PSTL_ICC_18_OMP_SIMD_BROKEN
2241                     __internal::__brick_copy(__i1, __j1, __i, __is_vector);
2242 #else
2243                     std::copy(__i1, __j1, __i);
2244 #endif
2245                     // 2. Sort elements in output sequence
2246                     std::sort(__i, __j, __comp);
2247                 },
2248                 __n1);
2249             return __d_first + __n1;
2250         }
2251         else
2252         {
2253             typedef typename std::iterator_traits<_ForwardIterator>::value_type _T1;
2254             typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _T2;
2255             __par_backend::__buffer<_T1> __buf(__n1);
2256             _T1* __r = __buf.get();
2257 
2258             __par_backend::__parallel_stable_sort(std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp,
2259                                                   [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) {
2260                                                       _ForwardIterator __it = __first + (__i - __r);
2261 
2262                                                       // 1. Copy elements from input to raw memory
2263                                                       for (_T1* __k = __i; __k != __j; ++__k, ++__it)
2264                                                       {
2265                                                           ::new (__k) _T2(*__it);
2266                                                       }
2267 
2268                                                       // 2. Sort elements in temporary __buffer
2269                                                       if (__n2 < __j - __i)
2270                                                           std::partial_sort(__i, __i + __n2, __j, __comp);
2271                                                       else
2272                                                           std::sort(__i, __j, __comp);
2273                                                   },
2274                                                   __n2);
2275 
2276             // 3. Move elements from temporary __buffer to output
2277             __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2,
2278                                           [__r, __d_first, __is_vector](_T1* __i, _T1* __j) {
2279                                               __internal::__brick_move(__i, __j, __d_first + (__i - __r), __is_vector);
2280                                           });
2281             return __d_first + __n2;
2282         }
2283     });
2284 }
2285 #endif
2286 
2287 //------------------------------------------------------------------------
2288 // adjacent_find
2289 //------------------------------------------------------------------------
2290 template <class _ForwardIterator, class _BinaryPredicate>
2291 _ForwardIterator
2292 __brick_adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred,
2293                       /* IsVector = */ std::true_type, bool __or_semantic) noexcept
2294 {
2295     return __unseq_backend::__simd_adjacent_find(__first, __last, __pred, __or_semantic);
2296 }
2297 
2298 template <class _ForwardIterator, class _BinaryPredicate>
2299 _ForwardIterator
2300 __brick_adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred,
2301                       /* IsVector = */ std::false_type, bool __or_semantic) noexcept
2302 {
2303     return std::adjacent_find(__first, __last, __pred);
2304 }
2305 
2306 template <class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate, class _IsVector>
2307 _ForwardIterator
2308 __pattern_adjacent_find(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred,
2309                         /* is_parallel */ std::false_type, _IsVector __is_vector, bool __or_semantic) noexcept
2310 {
2311     return __internal::__brick_adjacent_find(__first, __last, __pred, __is_vector, __or_semantic);
2312 }
2313 
2314 #if __PSTL_USE_PAR_POLICIES
2315 template <class _ExecutionPolicy, class _RandomAccessIterator, class _BinaryPredicate, class _IsVector>
2316 _RandomAccessIterator
2317 __pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
2318                         _BinaryPredicate __pred, /* is_parallel */ std::true_type, _IsVector __is_vector,
2319                         bool __or_semantic)
2320 {
2321     if (__last - __first < 2)
2322         return __last;
2323 
2324     return __internal::__except_handler([&]() {
2325         return __par_backend::__parallel_reduce(
2326             std::forward<_ExecutionPolicy>(__exec), __first, __last, __last,
2327             [__last, __pred, __is_vector, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end,
2328                                                          _RandomAccessIterator __value) -> _RandomAccessIterator {
2329                 // TODO: investigate performance benefits from the use of shared variable for the result,
2330                 // checking (compare_and_swap idiom) its __value at __first.
2331                 if (__or_semantic && __value < __last)
2332                 { //found
2333                     __par_backend::__cancel_execution();
2334                     return __value;
2335                 }
2336 
2337                 if (__value > __begin)
2338                 {
2339                     // modify __end to check the predicate on the boundary __values;
2340                     // TODO: to use a custom range with boundaries overlapping
2341                     // TODO: investigate what if we remove "if" below and run algorithm on range [__first, __last-1)
2342                     // then check the pair [__last-1, __last)
2343                     if (__end != __last)
2344                         ++__end;
2345 
2346                     //correct the global result iterator if the "brick" returns a local "__last"
2347                     const _RandomAccessIterator __res =
2348                         __internal::__brick_adjacent_find(__begin, __end, __pred, __is_vector, __or_semantic);
2349                     if (__res < __end)
2350                         __value = __res;
2351                 }
2352                 return __value;
2353             },
2354             [](_RandomAccessIterator __x, _RandomAccessIterator __y) -> _RandomAccessIterator {
2355                 return __x < __y ? __x : __y;
2356             } //reduce a __value
2357         );
2358     });
2359 }
2360 #endif
2361 
2362 //------------------------------------------------------------------------
2363 // nth_element
2364 //------------------------------------------------------------------------
2365 
2366 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector>
2367 void
2368 __pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth,
2369                       _RandomAccessIterator __last, _Compare __comp, _IsVector,
2370                       /*is_parallel=*/std::false_type) noexcept
2371 {
2372     std::nth_element(__first, __nth, __last, __comp);
2373 }
2374 
2375 #if __PSTL_USE_PAR_POLICIES
2376 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector>
2377 void
2378 __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth,
2379                       _RandomAccessIterator __last, _Compare __comp, _IsVector __is_vector,
2380                       /*is_parallel=*/std::true_type) noexcept
2381 {
2382     if (__first == __last || __nth == __last)
2383     {
2384         return;
2385     }
2386 
2387     using std::iter_swap;
2388     typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _Tp;
2389     _RandomAccessIterator __x;
2390     do
2391     {
2392         __x = __internal::__pattern_partition(std::forward<_ExecutionPolicy>(__exec), __first + 1, __last,
2393                                   [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }, __is_vector,
2394                                   /*is_parallel=*/std::true_type());
2395         --__x;
2396         if (__x != __first)
2397         {
2398             iter_swap(__first, __x);
2399         }
2400         // if x > nth then our new range for partition is [first, x)
2401         if (__x - __nth > 0)
2402         {
2403             __last = __x;
2404         }
2405         // if x < nth then our new range for partition is [x, last)
2406         else if (__x - __nth < 0)
2407         {
2408             // if *x == *nth then we can start new partition with x+1
2409             if (!__comp(*__nth, *__x) && !__comp(*__x, *__nth))
2410             {
2411                 ++__x;
2412             }
2413             else
2414             {
2415                 iter_swap(__nth, __x);
2416             }
2417             __first = __x;
2418         }
2419     } while (__x != __nth);
2420 }
2421 #endif
2422 
2423 //------------------------------------------------------------------------
2424 // fill, fill_n
2425 //------------------------------------------------------------------------
2426 template <class _ForwardIterator, class _Tp>
2427 void
2428 __brick_fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value,
2429              /* __is_vector = */ std::true_type) noexcept
2430 {
2431     __unseq_backend::__simd_fill_n(__first, __last - __first, __value);
2432 }
2433 
2434 template <class _ForwardIterator, class _Tp>
2435 void
2436 __brick_fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value,
2437              /* __is_vector = */ std::false_type) noexcept
2438 {
2439     std::fill(__first, __last, __value);
2440 }
2441 
2442 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _IsVector>
2443 void
2444 __pattern_fill(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value,
2445                /*is_parallel=*/std::false_type, _IsVector __is_vector) noexcept
2446 {
2447     __internal::__brick_fill(__first, __last, __value, __is_vector);
2448 }
2449 
2450 #if __PSTL_USE_PAR_POLICIES
2451 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _IsVector>
2452 _ForwardIterator
2453 __pattern_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value,
2454                /*is_parallel=*/std::true_type, _IsVector __is_vector)
2455 {
2456     return __internal::__except_handler([&__exec, __first, __last, &__value, __is_vector]() {
2457         __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last,
2458                                       [&__value, __is_vector](_ForwardIterator __begin, _ForwardIterator __end) {
2459                                           __internal::__brick_fill(__begin, __end, __value, __is_vector);
2460                                       });
2461         return __last;
2462     });
2463 }
2464 #endif
2465 
2466 template <class _OutputIterator, class _Size, class _Tp>
2467 _OutputIterator
2468 __brick_fill_n(_OutputIterator __first, _Size __count, const _Tp& __value, /* __is_vector = */ std::true_type) noexcept
2469 {
2470     return __unseq_backend::__simd_fill_n(__first, __count, __value);
2471 }
2472 
2473 template <class _OutputIterator, class _Size, class _Tp>
2474 _OutputIterator
2475 __brick_fill_n(_OutputIterator __first, _Size __count, const _Tp& __value, /* __is_vector = */ std::false_type) noexcept
2476 {
2477     return std::fill_n(__first, __count, __value);
2478 }
2479 
2480 template <class _ExecutionPolicy, class _OutputIterator, class _Size, class _Tp, class _IsVector>
2481 _OutputIterator
2482 __pattern_fill_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value,
2483                  /*is_parallel=*/std::false_type, _IsVector __is_vector) noexcept
2484 {
2485     return __internal::__brick_fill_n(__first, __count, __value, __is_vector);
2486 }
2487 
2488 template <class _ExecutionPolicy, class _OutputIterator, class _Size, class _Tp, class _IsVector>
2489 _OutputIterator
2490 __pattern_fill_n(_ExecutionPolicy&& __exec, _OutputIterator __first, _Size __count, const _Tp& __value,
2491                  /*is_parallel=*/std::true_type, _IsVector __is_vector)
2492 {
2493     return __internal::__pattern_fill(std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __value, std::true_type(),
2494                           __is_vector);
2495 }
2496 
2497 //------------------------------------------------------------------------
2498 // generate, generate_n
2499 //------------------------------------------------------------------------
2500 template <class _RandomAccessIterator, class _Generator>
2501 void
2502 __brick_generate(_RandomAccessIterator __first, _RandomAccessIterator __last, _Generator __g,
2503                  /* is_vector = */ std::true_type) noexcept
2504 {
2505     __unseq_backend::__simd_generate_n(__first, __last - __first, __g);
2506 }
2507 
2508 template <class _ForwardIterator, class _Generator>
2509 void
2510 __brick_generate(_ForwardIterator __first, _ForwardIterator __last, _Generator __g,
2511                  /* is_vector = */ std::false_type) noexcept
2512 {
2513     std::generate(__first, __last, __g);
2514 }
2515 
2516 template <class _ExecutionPolicy, class _ForwardIterator, class _Generator, class _IsVector>
2517 void
2518 __pattern_generate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g,
2519                    /*is_parallel=*/std::false_type, _IsVector __is_vector) noexcept
2520 {
2521     __internal::__brick_generate(__first, __last, __g, __is_vector);
2522 }
2523 
2524 #if __PSTL_USE_PAR_POLICIES
2525 template <class _ExecutionPolicy, class _ForwardIterator, class _Generator, class _IsVector>
2526 _ForwardIterator
2527 __pattern_generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g,
2528                    /*is_parallel=*/std::true_type, _IsVector __is_vector)
2529 {
2530     return __internal::__except_handler([&]() {
2531         __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last,
2532                                       [__g, __is_vector](_ForwardIterator __begin, _ForwardIterator __end) {
2533                                           __internal::__brick_generate(__begin, __end, __g, __is_vector);
2534                                       });
2535         return __last;
2536     });
2537 }
2538 #endif
2539 
2540 template <class OutputIterator, class Size, class _Generator>
2541 OutputIterator
2542 __brick_generate_n(OutputIterator __first, Size __count, _Generator __g, /* is_vector = */ std::true_type) noexcept
2543 {
2544     return __unseq_backend::__simd_generate_n(__first, __count, __g);
2545 }
2546 
2547 template <class OutputIterator, class Size, class _Generator>
2548 OutputIterator
2549 __brick_generate_n(OutputIterator __first, Size __count, _Generator __g, /* is_vector = */ std::false_type) noexcept
2550 {
2551     return std::generate_n(__first, __count, __g);
2552 }
2553 
2554 template <class _ExecutionPolicy, class _OutputIterator, class _Size, class _Generator, class _IsVector>
2555 _OutputIterator
2556 __pattern_generate_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g,
2557                      /*is_parallel=*/std::false_type, _IsVector __is_vector) noexcept
2558 {
2559     return __internal::__brick_generate_n(__first, __count, __g, __is_vector);
2560 }
2561 
2562 #if __PSTL_USE_PAR_POLICIES
2563 template <class _ExecutionPolicy, class _OutputIterator, class _Size, class _Generator, class _IsVector>
2564 _OutputIterator
2565 __pattern_generate_n(_ExecutionPolicy&& __exec, _OutputIterator __first, _Size __count, _Generator __g,
2566                      /*is_parallel=*/std::true_type, _IsVector __is_vector)
2567 {
2568     static_assert(__is_random_access_iterator<_OutputIterator>::value,
2569                   "Pattern-brick error. Should be a random access iterator.");
2570     return __internal::__pattern_generate(std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __g, std::true_type(),
2571                               __is_vector);
2572 }
2573 #endif
2574 
2575 //------------------------------------------------------------------------
2576 // remove
2577 //------------------------------------------------------------------------
2578 
2579 template <class _ForwardIterator, class _UnaryPredicate>
2580 _ForwardIterator
2581 __brick_remove_if(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
2582                   /* __is_vector = */ std::false_type) noexcept
2583 {
2584     return std::remove_if(__first, __last, __pred);
2585 }
2586 
2587 template <class _RandomAccessIterator, class _UnaryPredicate>
2588 _RandomAccessIterator
2589 __brick_remove_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred,
2590                   /* __is_vector = */ std::true_type) noexcept
2591 {
2592 #if __PSTL_MONOTONIC_PRESENT
2593     return __unseq_backend::__simd_remove_if(__first, __last - __first, __pred);
2594 #else
2595     return std::remove_if(__first, __last, __pred);
2596 #endif
2597 }
2598 
2599 template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate, class _IsVector>
2600 _ForwardIterator
2601 __pattern_remove_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
2602                     _IsVector __is_vector, /*is_parallel*/ std::false_type) noexcept
2603 {
2604     return __internal::__brick_remove_if(__first, __last, __pred, __is_vector);
2605 }
2606 
2607 #if __PSTL_USE_PAR_POLICIES
2608 template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate, class _IsVector>
2609 _ForwardIterator
2610 __pattern_remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
2611                     _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel*/ std::true_type) noexcept
2612 {
2613     typedef typename std::iterator_traits<_ForwardIterator>::reference _ReferenceType;
2614 
2615     if (__first == __last || __first + 1 == __last)
2616     {
2617         // Trivial sequence - use serial algorithm
2618         return __internal::__brick_remove_if(__first, __last, __pred, __is_vector);
2619     }
2620 
2621     return __internal::__remove_elements(std::forward<_ExecutionPolicy>(__exec), __first, __last,
2622                            [&__pred, __is_vector](bool* __b, bool* __e, _ForwardIterator __it) {
2623                                __internal::__brick_walk2(__b, __e, __it,
2624                                              [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); },
2625                                              __is_vector);
2626                            },
2627                            __is_vector);
2628 }
2629 #endif
2630 
2631 //------------------------------------------------------------------------
2632 // merge
2633 //------------------------------------------------------------------------
2634 
2635 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
2636 _OutputIterator
2637 __brick_merge(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
2638               _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp,
2639               /* __is_vector = */ std::false_type) noexcept
2640 {
2641     return std::merge(__first1, __last1, __first2, __last2, __d_first, __comp);
2642 }
2643 
2644 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
2645 _OutputIterator
2646 __brick_merge(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
2647               _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp,
2648               /* __is_vector = */ std::true_type) noexcept
2649 {
2650     __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
2651     return std::merge(__first1, __last1, __first2, __last2, __d_first, __comp);
2652 }
2653 
2654 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
2655           class _Compare, class _IsVector>
2656 _OutputIterator
2657 __pattern_merge(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
2658                 _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp, _IsVector __is_vector,
2659                 /* is_parallel = */ std::false_type) noexcept
2660 {
2661     return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp, __is_vector);
2662 }
2663 
2664 #if __PSTL_USE_PAR_POLICIES
2665 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _OutputIterator,
2666           class _Compare, class _IsVector>
2667 _OutputIterator
2668 __pattern_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
2669                 _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _OutputIterator __d_first,
2670                 _Compare __comp, _IsVector __is_vector, /* is_parallel = */ std::true_type)
2671 {
2672     __par_backend::__parallel_merge(
2673         std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp,
2674         [__is_vector](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2,
2675                       _RandomAccessIterator2 __l2, _OutputIterator __f3,
2676                       _Compare __comp) { return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, __is_vector); });
2677     return __d_first + (__last1 - __first1) + (__last2 - __first2);
2678 }
2679 #endif
2680 
2681 //------------------------------------------------------------------------
2682 // inplace_merge
2683 //------------------------------------------------------------------------
2684 template <class _BidirectionalIterator, class _Compare>
2685 void
2686 __brick_inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last,
2687                       _Compare __comp, /* __is_vector = */ std::false_type) noexcept
2688 {
2689     std::inplace_merge(__first, __middle, __last, __comp);
2690 }
2691 
2692 template <class _BidirectionalIterator, class _Compare>
2693 void
2694 __brick_inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last,
2695                       _Compare __comp, /* __is_vector = */ std::true_type) noexcept
2696 {
2697     __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial")
2698     std::inplace_merge(__first, __middle, __last, __comp);
2699 }
2700 
2701 template <class _ExecutionPolicy, class _BidirectionalIterator, class _Compare, class _IsVector>
2702 void
2703 __pattern_inplace_merge(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle,
2704                         _BidirectionalIterator __last, _Compare __comp, _IsVector __is_vector,
2705                         /* is_parallel = */ std::false_type) noexcept
2706 {
2707     __internal::__brick_inplace_merge(__first, __middle, __last, __comp, __is_vector);
2708 }
2709 
2710 #if __PSTL_USE_PAR_POLICIES
2711 template <class _ExecutionPolicy, class _BidirectionalIterator, class _Compare, class _IsVector>
2712 void
2713 __pattern_inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle,
2714                         _BidirectionalIterator __last, _Compare __comp, _IsVector __is_vector,
2715                         /*is_parallel=*/std::true_type)
2716 {
2717     if (__first == __last || __first == __middle || __middle == __last)
2718     {
2719         return;
2720     }
2721     typedef typename std::iterator_traits<_BidirectionalIterator>::value_type _Tp;
2722     auto __n = __last - __first;
2723     __par_backend::__buffer<_Tp> __buf(__n);
2724     _Tp* __r = __buf.get();
2725     __internal::__except_handler([&]() {
2726         auto __move_values = [](_BidirectionalIterator __x, _Tp* __z) {
2727                                  __internal::__invoke_if_else(std::is_trivial<_Tp>(), [&]() { *__z = std::move(*__x); },
2728                                                               [&]() { ::new (std::addressof(*__z)) _Tp(std::move(*__x)); });
2729         };
2730 
2731         auto __move_sequences = [](_BidirectionalIterator __first1, _BidirectionalIterator __last1, _Tp* __first2) {
2732             return __internal::__brick_uninitialized_move(__first1, __last1, __first2, _IsVector());
2733         };
2734 
2735         __par_backend::__parallel_merge(
2736             std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp,
2737             [__n, __move_values, __move_sequences](_BidirectionalIterator __f1, _BidirectionalIterator __l1,
2738                                                    _BidirectionalIterator __f2, _BidirectionalIterator __l2, _Tp* __f3,
2739                                                    _Compare __comp) {
2740                 auto __func = __par_backend::__serial_move_merge<decltype(__move_values), decltype(__move_sequences)>(
2741                     __n, __move_values, __move_sequences);
2742                 __func(__f1, __l1, __f2, __l2, __f3, __comp);
2743                 return __f3 + (__l1 - __f1) + (__l2 - __f2);
2744             });
2745 
2746         __par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __r, __r + __n,
2747                                       [__r, __first, __is_vector](_Tp* __i, _Tp* __j) {
2748                                           __internal::__brick_move(__i, __j, __first + (__i - __r), __is_vector);
2749                                       });
2750     });
2751 }
2752 #endif
2753 
2754 //------------------------------------------------------------------------
2755 // includes
2756 //------------------------------------------------------------------------
2757 
2758 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare, class _IsVector>
2759 bool
2760 __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
2761                    _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, _IsVector,
2762                    /*is_parallel=*/std::false_type) noexcept
2763 {
2764     return std::includes(__first1, __last1, __first2, __last2, __comp);
2765 }
2766 
2767 #if __PSTL_USE_PAR_POLICIES
2768 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare, class _IsVector>
2769 bool
2770 __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
2771                    _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, _IsVector __is_vector,
2772                    /*is_parallel=*/std::true_type)
2773 {
2774     if (__first2 >= __last2)
2775         return true;
2776 
2777     if (__first1 >= __last1 || __comp(*__first2, *__first1) || __comp(*(__last1 - 1), *(__last2 - 1)))
2778         return false;
2779 
2780     __first1 = std::lower_bound(__first1, __last1, *__first2, __comp);
2781     if (__first1 == __last1)
2782         return false;
2783 
2784     if (__last2 - __first2 == 1)
2785         return !__comp(*__first1, *__first2) && !__comp(*__first2, *__first1);
2786 
2787     return __internal::__except_handler([&]() {
2788          return !__internal::__parallel_or(
2789             std::forward<_ExecutionPolicy>(__exec), __first2, __last2,
2790             [__first1, __last1, __first2, __last2, &__comp](_ForwardIterator2 __i, _ForwardIterator2 __j) {
2791                 __PSTL_ASSERT(__j > __i);
2792                 //__PSTL_ASSERT(__j - __i > 1);
2793 
2794                 //1. moving boundaries to "consume" subsequence of equal elements
2795                 auto __is_equal = [&__comp](_ForwardIterator2 __a, _ForwardIterator2 __b) -> bool {
2796                     return !__comp(*__a, *__b) && !__comp(*__b, *__a);
2797                 };
2798 
2799                 //1.1 left bound, case "aaa[aaaxyz...]" - searching "x"
2800                 if (__i > __first2 && __is_equal(__i, __i - 1))
2801                 {
2802                     //whole subrange continues to content equal elements - return "no op"
2803                     if (__is_equal(__i, __j - 1))
2804                         return false;
2805 
2806                     __i = std::upper_bound(__i, __last2, *__i, __comp);
2807                 }
2808 
2809                 //1.2 right bound, case "[...aaa]aaaxyz" - searching "x"
2810                 if (__j < __last2 && __is_equal(__j - 1, __j))
2811                     __j = std::upper_bound(__j, __last2, *__j, __comp);
2812 
2813                 //2. testing is __a subsequence of the second range included into the first range
2814                 auto __b = std::lower_bound(__first1, __last1, *__i, __comp);
2815 
2816                 __PSTL_ASSERT(!__comp(*(__last1 - 1), *__b));
2817                 __PSTL_ASSERT(!__comp(*(__j - 1), *__i));
2818                 return !std::includes(__b, __last1, __i, __j, __comp);
2819             });
2820     });
2821 }
2822 #endif
2823 
2824 constexpr auto __set_algo_cut_off = 1000;
2825 
2826 #if __PSTL_USE_PAR_POLICIES
2827 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
2828           class _Compare, class _IsVector, class _SizeFunction, class _SetOP>
2829 _OutputIterator
2830 __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
2831                   _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
2832                   _SizeFunction __size_func, _SetOP __set_op, _IsVector __is_vector)
2833 {
2834     typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
2835     typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
2836 
2837     struct _SetRange
2838     {
2839         _DifferenceType __pos, __len, __buf_pos;
2840         bool
2841         empty() const
2842         {
2843             return __len == 0;
2844         }
2845     };
2846 
2847     const _DifferenceType __n1 = __last1 - __first1;
2848     const _DifferenceType __n2 = __last2 - __first2;
2849 
2850     __par_backend::__buffer<_T> __buf(__size_func(__n1, __n2));
2851 
2852     return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __is_vector, __comp,
2853                              __size_func, __set_op, &__buf]() {
2854         auto __buffer = __buf.get();
2855         _DifferenceType __m{};
2856         auto __scan = [=](_DifferenceType, _DifferenceType, const _SetRange& __s) { // Scan
2857             if (!__s.empty())
2858                 __internal::__brick_move(__buffer + __s.__buf_pos, __buffer + (__s.__buf_pos + __s.__len), __result + __s.__pos,
2859                              __is_vector);
2860         };
2861         __par_backend::__parallel_strict_scan(
2862             std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0},
2863             [=](_DifferenceType __i, _DifferenceType __len) {                 // Reduce
2864                 //[__b; __e) - a subrange of the first sequence, to reduce
2865                 _ForwardIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len);
2866 
2867                 //try searching for the first element which not equal to *__b
2868                 if (__b != __first1)
2869                     __b = std::upper_bound(__b, __last1, *__b, __comp);
2870 
2871                 //try searching for the first element which not equal to *__e
2872                 if (__e != __last1)
2873                     __e = std::upper_bound(__e, __last1, *__e, __comp);
2874 
2875                 //check is [__b; __e) empty
2876                 if (__e - __b < 1)
2877                 {
2878                     _ForwardIterator2 __bb = __last2;
2879                     if (__b != __last1)
2880                         __bb = std::lower_bound(__first2, __last2, *__b, __comp);
2881 
2882                     const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2));
2883                     return _SetRange{0, 0, __buf_pos};
2884                 }
2885 
2886                 //try searching for "corresponding" subrange [__bb; __ee) in the second sequence
2887                 _ForwardIterator2 __bb = __first2;
2888                 if (__b != __first1)
2889                     __bb = std::lower_bound(__first2, __last2, *__b, __comp);
2890 
2891                 _ForwardIterator2 __ee = __last2;
2892                 if (__e != __last1)
2893                     __ee = std::lower_bound(__bb, __last2, *__e, __comp);
2894 
2895                 const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2));
2896                 auto __buffer_b = __buffer + __buf_pos;
2897                 auto __res = __set_op(__b, __e, __bb, __ee, __buffer_b, __comp);
2898 
2899                 return _SetRange{0, __res - __buffer_b, __buf_pos};
2900             },
2901             [](const _SetRange& __a, const _SetRange& __b) { // Combine
2902                 if (__b.__buf_pos > __a.__buf_pos || ((__b.__buf_pos == __a.__buf_pos) && !__b.empty()))
2903                     return _SetRange{__a.__pos + __a.__len + __b.__pos, __b.__len, __b.__buf_pos};
2904                 return _SetRange{__b.__pos + __b.__len + __a.__pos, __a.__len, __a.__buf_pos};
2905             },
2906             __scan,                                     // Scan
2907             [&__m, &__scan](const _SetRange& __total) { // Apex
2908                 //final scan
2909                 __scan(0, 0, __total);
2910                 __m = __total.__pos + __total.__len;
2911             });
2912         return __result + __m;
2913     });
2914 }
2915 #endif
2916 
2917 #if __PSTL_USE_PAR_POLICIES
2918 //a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference'
2919 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
2920           class _Compare, class _SetUnionOp, class _IsVector>
2921 _OutputIterator
2922 __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
2923                       _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
2924                       _SetUnionOp __set_union_op, _IsVector __is_vector)
2925 {
2926     typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
2927 
2928     const auto __n1 = __last1 - __first1;
2929     const auto __n2 = __last2 - __first2;
2930 
2931     auto copy_range1 = [__is_vector](_ForwardIterator1 __begin, _ForwardIterator1 __end, _OutputIterator __res) {
2932         return __internal::__brick_copy(__begin, __end, __res, __is_vector);
2933     };
2934     auto copy_range2 = [__is_vector](_ForwardIterator2 __begin, _ForwardIterator2 __end, _OutputIterator __res) {
2935         return __internal::__brick_copy(__begin, __end, __res, __is_vector);
2936     };
2937 
2938     // {1} {}: parallel copying just first sequence
2939     if (__n2 == 0)
2940         return __internal::__pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, copy_range1,
2941                                      std::true_type());
2942 
2943     // {} {2}: parallel copying justmake  second sequence
2944     if (__n1 == 0)
2945         return __internal::__pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __result, copy_range2,
2946                                      std::true_type());
2947 
2948     // testing  whether the sequences are intersected
2949     _ForwardIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp);
2950 
2951     if (__left_bound_seq_1 == __last1)
2952     {
2953         //{1} < {2}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2
2954         __par_backend::__parallel_invoke(std::forward<_ExecutionPolicy>(__exec),
2955                                          [=] {
2956                                              __internal::__pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first1,
2957                                                                    __last1, __result, copy_range1, std::true_type());
2958                                          },
2959                                          [=] {
2960                                              __internal::__pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first2,
2961                                                                    __last2, __result + __n1, copy_range2,
2962                                                                    std::true_type());
2963                                          });
2964         return __result + __n1 + __n2;
2965     }
2966 
2967     // testing  whether the sequences are intersected
2968     _ForwardIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp);
2969 
2970     if (__left_bound_seq_2 == __last2)
2971     {
2972         //{2} < {1}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2
2973         __par_backend::__parallel_invoke(std::forward<_ExecutionPolicy>(__exec),
2974                                          [=] {
2975                                              __internal::__pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first2,
2976                                                                    __last2, __result, copy_range2, std::true_type());
2977                                          },
2978                                          [=] {
2979                                              __internal::__pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first1,
2980                                                                    __last1, __result + __n2, copy_range1,
2981                                                                    std::true_type());
2982                                          });
2983         return __result + __n1 + __n2;
2984     }
2985 
2986     const auto __m1 = __left_bound_seq_1 - __first1;
2987     if (__m1 > __set_algo_cut_off)
2988     {
2989         auto __res_or = __result;
2990         __result += __m1; //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2)
2991         __par_backend::__parallel_invoke(
2992             std::forward<_ExecutionPolicy>(__exec),
2993             //do parallel copying of [first1; left_bound_seq_1)
2994             [=] {
2995                 __internal::__pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first1, __left_bound_seq_1, __res_or,
2996                                       copy_range1, std::true_type());
2997             },
2998             [=, &__result] {
2999                 __result = __internal::__parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1,
3000                                              __first2, __last2, __result, __comp,
3001                                              [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; },
3002                                              __set_union_op, __is_vector);
3003             });
3004         return __result;
3005     }
3006 
3007     const auto __m2 = __left_bound_seq_2 - __first2;
3008     __PSTL_ASSERT(__m1 == 0 || __m2 == 0);
3009     if (__m2 > __set_algo_cut_off)
3010     {
3011         auto __res_or = __result;
3012         __result += __m2; //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1)
3013         __par_backend::__parallel_invoke(
3014             std::forward<_ExecutionPolicy>(__exec),
3015             //do parallel copying of [first2; left_bound_seq_2)
3016             [=] {
3017                 __internal::__pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first2, __left_bound_seq_2, __res_or,
3018                                       copy_range2, std::true_type());
3019             },
3020             [=, &__result] {
3021                 __result = __internal::__parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
3022                                              __left_bound_seq_2, __last2, __result, __comp,
3023                                              [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; },
3024                                              __set_union_op, __is_vector);
3025             });
3026         return __result;
3027     }
3028 
3029     return __internal::__parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result,
3030                              __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op,
3031                              __is_vector);
3032 }
3033 #endif
3034 
3035 //------------------------------------------------------------------------
3036 // set_union
3037 //------------------------------------------------------------------------
3038 
3039 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
3040 _OutputIterator
3041 __brick_set_union(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3042                   _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3043                   /*__is_vector=*/std::false_type) noexcept
3044 {
3045     return std::set_union(__first1, __last1, __first2, __last2, __result, __comp);
3046 }
3047 
3048 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
3049 _OutputIterator
3050 __brick_set_union(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3051                   _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3052                   /*__is_vector=*/std::true_type) noexcept
3053 {
3054     __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
3055     return std::set_union(__first1, __last1, __first2, __last2, __result, __comp);
3056 }
3057 
3058 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
3059           class _Compare, class _IsVector>
3060 _OutputIterator
3061 __pattern_set_union(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3062                     _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3063                     _IsVector __is_vector,
3064                     /*is_parallel=*/std::false_type) noexcept
3065 {
3066     return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp, __is_vector);
3067 }
3068 
3069 #if __PSTL_USE_PAR_POLICIES
3070 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
3071           class _Compare, class _IsVector>
3072 _OutputIterator
3073 __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3074                     _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3075                     _IsVector __is_vector, /*__is_parallel=*/std::true_type)
3076 {
3077 
3078     const auto __n1 = __last1 - __first1;
3079     const auto __n2 = __last2 - __first2;
3080 
3081     // use serial algorithm
3082     if (__n1 + __n2 <= __set_algo_cut_off)
3083         return std::set_union(__first1, __last1, __first2, __last2, __result, __comp);
3084 
3085     typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
3086     return __internal::__parallel_set_union_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result,
3087                                  __comp,
3088                                  [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3089                                     _ForwardIterator2 __last2, _T* __result, _Compare __comp) {
3090                                      return std::set_union(__first1, __last1, __first2, __last2, __result, __comp);
3091                                  },
3092                                  __is_vector);
3093 }
3094 #endif
3095 
3096 //------------------------------------------------------------------------
3097 // set_intersection
3098 //------------------------------------------------------------------------
3099 
3100 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
3101 _OutputIterator
3102 __brick_set_intersection(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3103                          _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3104                          /*__is_vector=*/std::false_type) noexcept
3105 {
3106     return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp);
3107 }
3108 
3109 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
3110 _OutputIterator
3111 __brick_set_intersection(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3112                          _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3113                          /*__is_vector=*/std::true_type) noexcept
3114 {
3115     __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
3116     return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp);
3117 }
3118 
3119 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
3120           class _Compare, class _IsVector>
3121 _OutputIterator
3122 __pattern_set_intersection(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3123                            _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
3124                            _Compare __comp, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
3125 {
3126     return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp, __is_vector);
3127 }
3128 
3129 #if __PSTL_USE_PAR_POLICIES
3130 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
3131           class _Compare, class _IsVector>
3132 _OutputIterator
3133 __pattern_set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3134                            _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
3135                            _Compare __comp, _IsVector __is_vector, /*is_parallel=*/std::true_type)
3136 {
3137     typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
3138     typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
3139 
3140     const auto __n1 = __last1 - __first1;
3141     const auto __n2 = __last2 - __first2;
3142 
3143     // intersection is empty
3144     if (__n1 == 0 || __n2 == 0)
3145         return __result;
3146 
3147     // testing  whether the sequences are intersected
3148     _ForwardIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp);
3149     //{1} < {2}: seq 2 is wholly greater than seq 1, so, the intersection is empty
3150     if (__left_bound_seq_1 == __last1)
3151         return __result;
3152 
3153     // testing  whether the sequences are intersected
3154     _ForwardIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp);
3155     //{2} < {1}: seq 1 is wholly greater than seq 2, so, the intersection is empty
3156     if (__left_bound_seq_2 == __last2)
3157         return __result;
3158 
3159     const auto __m1 = __last1 - __left_bound_seq_1 + __n2;
3160     if (__m1 > __set_algo_cut_off)
3161     {
3162         //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2)
3163         return __internal::__parallel_set_op(
3164             std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp,
3165             [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); },
3166             [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3167                _ForwardIterator2 __last2, _T* __result, _Compare __comp) {
3168                 return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp);
3169             },
3170             __is_vector);
3171     }
3172 
3173     const auto __m2 = __last2 - __left_bound_seq_2 + __n1;
3174     if (__m2 > __set_algo_cut_off)
3175     {
3176         //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1)
3177         __result = __internal::__parallel_set_op(
3178             std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp,
3179             [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); },
3180             [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3181                _ForwardIterator2 __last2, _T* __result, _Compare __comp) {
3182                 return std::set_intersection(__first2, __last2, __first1, __last1, __result, __comp);
3183             },
3184             __is_vector);
3185         return __result;
3186     }
3187 
3188     // [left_bound_seq_1; last1) and [left_bound_seq_2; last2) - use serial algorithm
3189     return std::set_intersection(__left_bound_seq_1, __last1, __left_bound_seq_2, __last2, __result, __comp);
3190 }
3191 #endif
3192 
3193 //------------------------------------------------------------------------
3194 // set_difference
3195 //------------------------------------------------------------------------
3196 
3197 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
3198 _OutputIterator
3199 __brick_set_difference(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3200                        _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3201                        /*__is_vector=*/std::false_type) noexcept
3202 {
3203     return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp);
3204 }
3205 
3206 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
3207 _OutputIterator
3208 __brick_set_difference(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3209                        _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3210                        /*__is_vector=*/std::true_type) noexcept
3211 {
3212     __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
3213     return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp);
3214 }
3215 
3216 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
3217           class _Compare, class _IsVector>
3218 _OutputIterator
3219 __pattern_set_difference(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3220                          _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
3221                          _Compare __comp, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
3222 {
3223     return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, __is_vector);
3224 }
3225 
3226 #if __PSTL_USE_PAR_POLICIES
3227 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
3228           class _Compare, class _IsVector>
3229 _OutputIterator
3230 __pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3231                          _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
3232                          _Compare __comp, _IsVector __is_vector, /*is_parallel=*/std::true_type)
3233 {
3234     typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
3235     typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
3236 
3237     const auto __n1 = __last1 - __first1;
3238     const auto __n2 = __last2 - __first2;
3239 
3240     // {} \ {2}: the difference is empty
3241     if (__n1 == 0)
3242         return __result;
3243 
3244     // {1} \ {}: parallel copying just first sequence
3245     if (__n2 == 0)
3246         return __internal::__pattern_walk2_brick(
3247             std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result,
3248             [__is_vector](_ForwardIterator1 __begin, _ForwardIterator1 __end, _OutputIterator __res) {
3249                 return __internal::__brick_copy(__begin, __end, __res, __is_vector);
3250             },
3251             std::true_type());
3252 
3253     // testing  whether the sequences are intersected
3254     _ForwardIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp);
3255     //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence
3256     if (__left_bound_seq_1 == __last1)
3257         return __internal::__pattern_walk2_brick(
3258             std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result,
3259             [__is_vector](_ForwardIterator1 __begin, _ForwardIterator1 __end, _OutputIterator __res) {
3260                 return __internal::__brick_copy(__begin, __end, __res, __is_vector);
3261             },
3262             std::true_type());
3263 
3264     // testing  whether the sequences are intersected
3265     _ForwardIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp);
3266     //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence
3267     if (__left_bound_seq_2 == __last2)
3268         return __internal::__pattern_walk2_brick(
3269             std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result,
3270             [__is_vector](_ForwardIterator1 __begin, _ForwardIterator1 __end, _OutputIterator __res) {
3271                 return __internal::__brick_copy(__begin, __end, __res, __is_vector);
3272             },
3273             std::true_type());
3274 
3275     if (__n1 + __n2 > __set_algo_cut_off)
3276         return __internal::__parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result,
3277                                  __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n; },
3278                                  [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3279                                     _ForwardIterator2 __last2, _T* __result, _Compare __comp) {
3280                                      return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp);
3281                                  },
3282                                  __is_vector);
3283 
3284     // use serial algorithm
3285     return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp);
3286 }
3287 #endif
3288 
3289 //------------------------------------------------------------------------
3290 // set_symmetric_difference
3291 //------------------------------------------------------------------------
3292 
3293 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
3294 _OutputIterator
3295 __brick_set_symmetric_difference(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3296                                  _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3297                                  /*__is_vector=*/std::false_type) noexcept
3298 {
3299     return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp);
3300 }
3301 
3302 template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
3303 _OutputIterator
3304 __brick_set_symmetric_difference(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3305                                  _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
3306                                  /*__is_vector=*/std::true_type) noexcept
3307 {
3308     __PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
3309     return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp);
3310 }
3311 
3312 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
3313           class _Compare, class _IsVector>
3314 _OutputIterator
3315 __pattern_set_symmetric_difference(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3316                                    _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
3317                                    _Compare __comp, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
3318 {
3319     return __internal::__brick_set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp, __is_vector);
3320 }
3321 
3322 #if __PSTL_USE_PAR_POLICIES
3323 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
3324           class _Compare, class _IsVector>
3325 _OutputIterator
3326 __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3327                                    _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
3328                                    _Compare __comp, _IsVector __is_vector, /*is_parallel=*/std::true_type)
3329 {
3330 
3331     const auto __n1 = __last1 - __first1;
3332     const auto __n2 = __last2 - __first2;
3333 
3334     // use serial algorithm
3335     if (__n1 + __n2 <= __set_algo_cut_off)
3336         return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp);
3337 
3338     typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
3339     return __internal::__parallel_set_union_op(
3340         std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp,
3341         [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2,
3342            _T* __result, _Compare __comp) {
3343             return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp);
3344         },
3345         __is_vector);
3346 }
3347 #endif
3348 
3349 //------------------------------------------------------------------------
3350 // is_heap_until
3351 //------------------------------------------------------------------------
3352 
3353 template <class _RandomAccessIterator, class _Compare>
3354 _RandomAccessIterator
3355 __brick_is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
3356                       /* __is_vector = */ std::false_type) noexcept
3357 {
3358     return std::is_heap_until(__first, __last, __comp);
3359 }
3360 
3361 template <class _RandomAccessIterator, class _Compare>
3362 _RandomAccessIterator
3363 __brick_is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
3364                       /* __is_vector = */ std::true_type) noexcept
3365 {
3366     if (__last - __first < 2)
3367         return __last;
3368     typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType;
3369     return __unseq_backend::__simd_first(
3370         __first, _SizeType(0), __last - __first,
3371         [&__comp](_RandomAccessIterator __it, _SizeType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); });
3372 }
3373 
3374 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector>
3375 _RandomAccessIterator
3376 __pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last,
3377                         _Compare __comp, _IsVector __is_vector, /* is_parallel = */ std::false_type) noexcept
3378 {
3379     return __internal::__brick_is_heap_until(__first, __last, __comp, __is_vector);
3380 }
3381 
3382 template <class _RandomAccessIterator, class _DifferenceType, class _Compare>
3383 _RandomAccessIterator
3384 __is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp,
3385                     /* __is_vector = */ std::false_type) noexcept
3386 {
3387     _DifferenceType __i = __begin;
3388     for (; __i < __end; ++__i)
3389     {
3390         if (__comp(__first[(__i - 1) / 2], __first[__i]))
3391         {
3392             break;
3393         }
3394     }
3395     return __first + __i;
3396 }
3397 
3398 template <class _RandomAccessIterator, class _DifferenceType, class _Compare>
3399 _RandomAccessIterator
3400 __is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp,
3401                     /* __is_vector = */ std::true_type) noexcept
3402 {
3403     return __unseq_backend::__simd_first(
3404         __first, __begin, __end,
3405         [&__comp](_RandomAccessIterator __it, _DifferenceType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); });
3406 }
3407 
3408 #if __PSTL_USE_PAR_POLICIES
3409 template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsVector>
3410 _RandomAccessIterator
3411 __pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
3412                         _Compare __comp, _IsVector __is_vector, /* is_parallel = */ std::true_type) noexcept
3413 {
3414     if (__last - __first < 2)
3415         return __last;
3416 
3417     return __internal::__except_handler([&]() {
3418         return __parallel_find(
3419             std::forward<_ExecutionPolicy>(__exec), __first, __last,
3420             [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) {
3421                 return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp, __is_vector);
3422             },
3423             std::less<typename std::iterator_traits<_RandomAccessIterator>::difference_type>(), /*is_first=*/true);
3424     });
3425 }
3426 #endif
3427 
3428 //------------------------------------------------------------------------
3429 // min_element
3430 //------------------------------------------------------------------------
3431 
3432 template <typename _ForwardIterator, typename _Compare>
3433 _ForwardIterator
3434 __brick_min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp,
3435                     /* __is_vector = */ std::false_type) noexcept
3436 {
3437     return std::min_element(__first, __last, __comp);
3438 }
3439 
3440 template <typename _ForwardIterator, typename _Compare>
3441 _ForwardIterator
3442 __brick_min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp,
3443                     /* __is_vector = */ std::true_type) noexcept
3444 {
3445 #if __PSTL_UDR_PRESENT
3446     return __unseq_backend::__simd_min_element(__first, __last - __first, __comp);
3447 #else
3448     return std::min_element(__first, __last, __comp);
3449 #endif
3450 }
3451 
3452 template <typename _ExecutionPolicy, typename _ForwardIterator, typename _Compare, typename _IsVector>
3453 _ForwardIterator
3454 __pattern_min_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp,
3455                       _IsVector __is_vector, /* is_parallel = */ std::false_type) noexcept
3456 {
3457     return __internal::__brick_min_element(__first, __last, __comp, __is_vector);
3458 }
3459 
3460 #if __PSTL_USE_PAR_POLICIES
3461 template <typename _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare, typename _IsVector>
3462 _RandomAccessIterator
3463 __pattern_min_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
3464                       _Compare __comp, _IsVector __is_vector, /* is_parallel = */ std::true_type)
3465 {
3466     if (__first == __last)
3467         return __last;
3468 
3469     return __internal::__except_handler([&]() {
3470         return __par_backend::__parallel_reduce(
3471             std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, __first,
3472             [=](_RandomAccessIterator __begin, _RandomAccessIterator __end,
3473                 _RandomAccessIterator __init) -> _RandomAccessIterator {
3474                 const _RandomAccessIterator subresult = __internal::__brick_min_element(__begin, __end, __comp, __is_vector);
3475                 return __internal::__cmp_iterators_by_values(__init, subresult, __comp);
3476             },
3477             [=](_RandomAccessIterator __it1, _RandomAccessIterator __it2) -> _RandomAccessIterator {
3478                 return __internal::__cmp_iterators_by_values(__it1, __it2, __comp);
3479             });
3480     });
3481 }
3482 #endif
3483 
3484 //------------------------------------------------------------------------
3485 // minmax_element
3486 //------------------------------------------------------------------------
3487 
3488 template <typename _ForwardIterator, typename _Compare>
3489 std::pair<_ForwardIterator, _ForwardIterator>
3490 __brick_minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp,
3491                        /* __is_vector = */ std::false_type) noexcept
3492 {
3493     return std::minmax_element(__first, __last, __comp);
3494 }
3495 
3496 template <typename _ForwardIterator, typename _Compare>
3497 std::pair<_ForwardIterator, _ForwardIterator>
3498 __brick_minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp,
3499                        /* __is_vector = */ std::true_type) noexcept
3500 {
3501 #if __PSTL_UDR_PRESENT
3502     return __unseq_backend::__simd_minmax_element(__first, __last - __first, __comp);
3503 #else
3504     return std::minmax_element(__first, __last, __comp);
3505 #endif
3506 }
3507 
3508 template <typename _ExecutionPolicy, typename _ForwardIterator, typename _Compare, typename _IsVector>
3509 std::pair<_ForwardIterator, _ForwardIterator>
3510 __pattern_minmax_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp,
3511                          _IsVector __is_vector, /* is_parallel = */ std::false_type) noexcept
3512 {
3513     return __internal::__brick_minmax_element(__first, __last, __comp, __is_vector);
3514 }
3515 
3516 #if __PSTL_USE_PAR_POLICIES
3517 template <typename _ExecutionPolicy, typename _ForwardIterator, typename _Compare, typename _IsVector>
3518 std::pair<_ForwardIterator, _ForwardIterator>
3519 __pattern_minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp,
3520                          _IsVector __is_vector, /* is_parallel = */ std::true_type)
3521 {
3522     if (__first == __last)
3523         return std::make_pair(__first, __first);
3524 
3525     return __internal::__except_handler([&]() {
3526         typedef std::pair<_ForwardIterator, _ForwardIterator> _Result;
3527 
3528         return __par_backend::__parallel_reduce(
3529             std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, std::make_pair(__first, __first),
3530             [=](_ForwardIterator __begin, _ForwardIterator __end, _Result __init) -> _Result {
3531                 const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, __is_vector);
3532                 return std::make_pair(
3533                     __internal::__cmp_iterators_by_values(__subresult.first, __init.first, __comp),
3534                     __internal::__cmp_iterators_by_values(__init.second, __subresult.second, __not_pred<_Compare>(__comp)));
3535             },
3536             [=](_Result __p1, _Result __p2) -> _Result {
3537                 return std::make_pair(
3538                     __internal::__cmp_iterators_by_values(__p1.first, __p2.first, __comp),
3539                     __internal::__cmp_iterators_by_values(__p2.second, __p1.second, __not_pred<_Compare>(__comp)));
3540             });
3541     });
3542 }
3543 #endif
3544 
3545 //------------------------------------------------------------------------
3546 // mismatch
3547 //------------------------------------------------------------------------
3548 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
3549 std::pair<_ForwardIterator1, _ForwardIterator2>
3550 __mismatch_serial(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3551                   _ForwardIterator2 __last2, _BinaryPredicate __pred)
3552 {
3553 #if __PSTL_CPP14_2RANGE_MISMATCH_EQUAL_PRESENT
3554     return std::mismatch(__first1, __last1, __first2, __last2, __pred);
3555 #else
3556     for (; __first1 != __last1 && __first2 != __last2 && __pred(*__first1, *__first2); ++__first1, ++__first2)
3557     {
3558     }
3559     return std::make_pair(__first1, __first2);
3560 #endif
3561 }
3562 
3563 template <class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
3564 std::pair<_ForwardIterator1, _ForwardIterator2>
3565 __brick_mismatch(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3566                  _ForwardIterator2 __last2, _Predicate __pred, /* __is_vector = */ std::false_type) noexcept
3567 {
3568     return __mismatch_serial(__first1, __last1, __first2, __last2, __pred);
3569 }
3570 
3571 template <class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
3572 std::pair<_ForwardIterator1, _ForwardIterator2>
3573 __brick_mismatch(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3574                  _ForwardIterator2 __last2, _Predicate __pred, /* __is_vector = */ std::true_type) noexcept
3575 {
3576     auto __n = std::min(__last1 - __first1, __last2 - __first2);
3577     return __unseq_backend::__simd_first(__first1, __n, __first2, __not_pred<_Predicate>(__pred));
3578 }
3579 
3580 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Predicate, class _IsVector>
3581 std::pair<_ForwardIterator1, _ForwardIterator2>
3582 __pattern_mismatch(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3583                    _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Predicate __pred, _IsVector __is_vector,
3584                    /* is_parallel = */ std::false_type) noexcept
3585 {
3586     return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, __is_vector);
3587 }
3588 
3589 #if __PSTL_USE_PAR_POLICIES
3590 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _Predicate,
3591           class _IsVector>
3592 std::pair<_RandomAccessIterator1, _RandomAccessIterator2>
3593 __pattern_mismatch(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
3594                    _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Predicate __pred,
3595                    _IsVector __is_vector, /* is_parallel = */ std::true_type) noexcept
3596 {
3597     return __internal::__except_handler([&]() {
3598         auto __n = std::min(__last1 - __first1, __last2 - __first2);
3599         auto __result = __internal::__parallel_find(
3600             std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n,
3601             [__first1, __first2, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) {
3602                 return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), __pred,
3603                                         __is_vector)
3604                     .first;
3605             },
3606             std::less<typename std::iterator_traits<_RandomAccessIterator1>::difference_type>(), /*is_first=*/true);
3607         return std::make_pair(__result, __first2 + (__result - __first1));
3608     });
3609 }
3610 #endif
3611 
3612 //------------------------------------------------------------------------
3613 // lexicographical_compare
3614 //------------------------------------------------------------------------
3615 
3616 template <class _ForwardIterator1, class _ForwardIterator2, class _Compare>
3617 bool
3618 __brick_lexicographical_compare(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3619                                 _ForwardIterator2 __last2, _Compare __comp,
3620                                 /* __is_vector = */ std::false_type) noexcept
3621 {
3622     return std::lexicographical_compare(__first1, __last1, __first2, __last2, __comp);
3623 }
3624 
3625 template <class _ForwardIterator1, class _ForwardIterator2, class _Compare>
3626 bool
3627 __brick_lexicographical_compare(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
3628                                 _ForwardIterator2 __last2, _Compare __comp, /* __is_vector = */ std::true_type) noexcept
3629 {
3630     if (__first2 == __last2)
3631     { // if second sequence is empty
3632         return false;
3633     }
3634     else if (__first1 == __last1)
3635     { // if first sequence is empty
3636         return true;
3637     }
3638     else
3639     {
3640         typedef typename std::iterator_traits<_ForwardIterator1>::reference ref_type1;
3641         typedef typename std::iterator_traits<_ForwardIterator2>::reference ref_type2;
3642         --__last1;
3643         --__last2;
3644         auto __n = std::min(__last1 - __first1, __last2 - __first2);
3645         std::pair<_ForwardIterator1, _ForwardIterator2> __result = __unseq_backend::__simd_first(
3646             __first1, __n, __first2, [__comp](const ref_type1 __x, const ref_type2 __y) mutable {
3647                 return __comp(__x, __y) || __comp(__y, __x);
3648             });
3649 
3650         if (__result.first == __last1 && __result.second != __last2)
3651         { // if first sequence shorter than second
3652             return !__comp(*__result.second, *__result.first);
3653         }
3654         else
3655         { // if second sequence shorter than first or both have the same number of elements
3656             return __comp(*__result.first, *__result.second);
3657         }
3658     }
3659 }
3660 
3661 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare, class _IsVector>
3662 bool
3663 __pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3664                                   _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp,
3665                                   _IsVector __is_vector, /* is_parallel = */ std::false_type) noexcept
3666 {
3667     return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, __is_vector);
3668 }
3669 
3670 #if __PSTL_USE_PAR_POLICIES
3671 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare, class _IsVector>
3672 bool
3673 __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
3674                                   _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp,
3675                                   _IsVector __is_vector, /* is_parallel = */ std::true_type) noexcept
3676 {
3677     if (__first2 == __last2)
3678     { // if second sequence is empty
3679         return false;
3680     }
3681     else if (__first1 == __last1)
3682     { // if first sequence is empty
3683         return true;
3684     }
3685     else
3686     {
3687         typedef typename std::iterator_traits<_ForwardIterator1>::reference _RefType1;
3688         typedef typename std::iterator_traits<_ForwardIterator2>::reference _RefType2;
3689         --__last1;
3690         --__last2;
3691         auto __n = std::min(__last1 - __first1, __last2 - __first2);
3692         auto __result = __internal::__parallel_find(
3693             std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n,
3694             [__first1, __first2, &__comp, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) {
3695                 return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1),
3696                                         [&__comp](const _RefType1 __x, const _RefType2 __y) {
3697                                             return !__comp(__x, __y) && !__comp(__y, __x);
3698                                         },
3699                                         __is_vector)
3700                     .first;
3701             },
3702             std::less<typename std::iterator_traits<_ForwardIterator1>::difference_type>(), /*is_first=*/true);
3703 
3704         if (__result == __last1 && __first2 + (__result - __first1) != __last2)
3705         { // if first sequence shorter than second
3706             return !__comp(*(__first2 + (__result - __first1)), *__result);
3707         }
3708         else
3709         { // if second sequence shorter than first or both have the same number of elements
3710             return __comp(*__result, *(__first2 + (__result - __first1)));
3711         }
3712     }
3713 }
3714 #endif
3715 
3716 } // namespace __internal
3717 } // namespace __pstl
3718 
3719 #endif /* __PSTL_algorithm_impl_H */
3720