xref: /freebsd-src/contrib/llvm-project/libcxx/include/__pstl/backends/libdispatch.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
2*0fca6ea1SDimitry Andric //
3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0fca6ea1SDimitry Andric //
7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
8*0fca6ea1SDimitry Andric 
9*0fca6ea1SDimitry Andric #ifndef _LIBCPP___PSTL_BACKENDS_LIBDISPATCH_H
10*0fca6ea1SDimitry Andric #define _LIBCPP___PSTL_BACKENDS_LIBDISPATCH_H
11*0fca6ea1SDimitry Andric 
12*0fca6ea1SDimitry Andric #include <__algorithm/inplace_merge.h>
13*0fca6ea1SDimitry Andric #include <__algorithm/lower_bound.h>
14*0fca6ea1SDimitry Andric #include <__algorithm/max.h>
15*0fca6ea1SDimitry Andric #include <__algorithm/merge.h>
16*0fca6ea1SDimitry Andric #include <__algorithm/upper_bound.h>
17*0fca6ea1SDimitry Andric #include <__atomic/atomic.h>
18*0fca6ea1SDimitry Andric #include <__config>
19*0fca6ea1SDimitry Andric #include <__exception/terminate.h>
20*0fca6ea1SDimitry Andric #include <__iterator/iterator_traits.h>
21*0fca6ea1SDimitry Andric #include <__iterator/move_iterator.h>
22*0fca6ea1SDimitry Andric #include <__memory/allocator.h>
23*0fca6ea1SDimitry Andric #include <__memory/construct_at.h>
24*0fca6ea1SDimitry Andric #include <__memory/unique_ptr.h>
25*0fca6ea1SDimitry Andric #include <__numeric/reduce.h>
26*0fca6ea1SDimitry Andric #include <__pstl/backend_fwd.h>
27*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/any_of.h>
28*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/cpu_traits.h>
29*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/fill.h>
30*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/find_if.h>
31*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/for_each.h>
32*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/merge.h>
33*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/stable_sort.h>
34*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/transform.h>
35*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/transform_reduce.h>
36*0fca6ea1SDimitry Andric #include <__utility/empty.h>
37*0fca6ea1SDimitry Andric #include <__utility/exception_guard.h>
38*0fca6ea1SDimitry Andric #include <__utility/move.h>
39*0fca6ea1SDimitry Andric #include <__utility/pair.h>
40*0fca6ea1SDimitry Andric #include <cstddef>
41*0fca6ea1SDimitry Andric #include <new>
42*0fca6ea1SDimitry Andric #include <optional>
43*0fca6ea1SDimitry Andric 
44*0fca6ea1SDimitry Andric _LIBCPP_PUSH_MACROS
45*0fca6ea1SDimitry Andric #include <__undef_macros>
46*0fca6ea1SDimitry Andric 
47*0fca6ea1SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD
48*0fca6ea1SDimitry Andric namespace __pstl {
49*0fca6ea1SDimitry Andric 
50*0fca6ea1SDimitry Andric namespace __libdispatch {
51*0fca6ea1SDimitry Andric // ::dispatch_apply is marked as __attribute__((nothrow)) because it doesn't let exceptions propagate, and neither do
52*0fca6ea1SDimitry Andric // we.
53*0fca6ea1SDimitry Andric // TODO: Do we want to add [[_Clang::__callback__(__func, __context, __)]]?
54*0fca6ea1SDimitry Andric _LIBCPP_EXPORTED_FROM_ABI void
55*0fca6ea1SDimitry Andric __dispatch_apply(size_t __chunk_count, void* __context, void (*__func)(void* __context, size_t __chunk)) noexcept;
56*0fca6ea1SDimitry Andric 
57*0fca6ea1SDimitry Andric template <class _Func>
58*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI void __dispatch_apply(size_t __chunk_count, _Func __func) noexcept {
59*0fca6ea1SDimitry Andric   __libdispatch::__dispatch_apply(__chunk_count, &__func, [](void* __context, size_t __chunk) {
60*0fca6ea1SDimitry Andric     (*static_cast<_Func*>(__context))(__chunk);
61*0fca6ea1SDimitry Andric   });
62*0fca6ea1SDimitry Andric }
63*0fca6ea1SDimitry Andric 
64*0fca6ea1SDimitry Andric struct __chunk_partitions {
65*0fca6ea1SDimitry Andric   ptrdiff_t __chunk_count_; // includes the first chunk
66*0fca6ea1SDimitry Andric   ptrdiff_t __chunk_size_;
67*0fca6ea1SDimitry Andric   ptrdiff_t __first_chunk_size_;
68*0fca6ea1SDimitry Andric };
69*0fca6ea1SDimitry Andric 
70*0fca6ea1SDimitry Andric [[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size) noexcept;
71*0fca6ea1SDimitry Andric 
72*0fca6ea1SDimitry Andric template <class _RandomAccessIterator, class _Functor>
73*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI optional<__empty>
74*0fca6ea1SDimitry Andric __dispatch_parallel_for(__chunk_partitions __partitions, _RandomAccessIterator __first, _Functor __func) {
75*0fca6ea1SDimitry Andric   // Perform the chunked execution.
76*0fca6ea1SDimitry Andric   __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) {
77*0fca6ea1SDimitry Andric     auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_;
78*0fca6ea1SDimitry Andric     auto __index =
79*0fca6ea1SDimitry Andric         __chunk == 0
80*0fca6ea1SDimitry Andric             ? 0
81*0fca6ea1SDimitry Andric             : (__chunk * __partitions.__chunk_size_) + (__partitions.__first_chunk_size_ - __partitions.__chunk_size_);
82*0fca6ea1SDimitry Andric     __func(__first + __index, __first + __index + __this_chunk_size);
83*0fca6ea1SDimitry Andric   });
84*0fca6ea1SDimitry Andric 
85*0fca6ea1SDimitry Andric   return __empty{};
86*0fca6ea1SDimitry Andric }
87*0fca6ea1SDimitry Andric } // namespace __libdispatch
88*0fca6ea1SDimitry Andric 
89*0fca6ea1SDimitry Andric template <>
90*0fca6ea1SDimitry Andric struct __cpu_traits<__libdispatch_backend_tag> {
91*0fca6ea1SDimitry Andric   template <class _RandomAccessIterator, class _Functor>
92*0fca6ea1SDimitry Andric   _LIBCPP_HIDE_FROM_ABI static optional<__empty>
93*0fca6ea1SDimitry Andric   __for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) {
94*0fca6ea1SDimitry Andric     return __libdispatch::__dispatch_parallel_for(
95*0fca6ea1SDimitry Andric         __libdispatch::__partition_chunks(__last - __first), std::move(__first), std::move(__func));
96*0fca6ea1SDimitry Andric   }
97*0fca6ea1SDimitry Andric 
98*0fca6ea1SDimitry Andric   template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIteratorOut>
99*0fca6ea1SDimitry Andric   struct __merge_range {
100*0fca6ea1SDimitry Andric     __merge_range(_RandomAccessIterator1 __mid1, _RandomAccessIterator2 __mid2, _RandomAccessIteratorOut __result)
101*0fca6ea1SDimitry Andric         : __mid1_(__mid1), __mid2_(__mid2), __result_(__result) {}
102*0fca6ea1SDimitry Andric 
103*0fca6ea1SDimitry Andric     _RandomAccessIterator1 __mid1_;
104*0fca6ea1SDimitry Andric     _RandomAccessIterator2 __mid2_;
105*0fca6ea1SDimitry Andric     _RandomAccessIteratorOut __result_;
106*0fca6ea1SDimitry Andric   };
107*0fca6ea1SDimitry Andric 
108*0fca6ea1SDimitry Andric   template <typename _RandomAccessIterator1,
109*0fca6ea1SDimitry Andric             typename _RandomAccessIterator2,
110*0fca6ea1SDimitry Andric             typename _RandomAccessIterator3,
111*0fca6ea1SDimitry Andric             typename _Compare,
112*0fca6ea1SDimitry Andric             typename _LeafMerge>
113*0fca6ea1SDimitry Andric   _LIBCPP_HIDE_FROM_ABI static optional<__empty>
114*0fca6ea1SDimitry Andric   __merge(_RandomAccessIterator1 __first1,
115*0fca6ea1SDimitry Andric           _RandomAccessIterator1 __last1,
116*0fca6ea1SDimitry Andric           _RandomAccessIterator2 __first2,
117*0fca6ea1SDimitry Andric           _RandomAccessIterator2 __last2,
118*0fca6ea1SDimitry Andric           _RandomAccessIterator3 __result,
119*0fca6ea1SDimitry Andric           _Compare __comp,
120*0fca6ea1SDimitry Andric           _LeafMerge __leaf_merge) noexcept {
121*0fca6ea1SDimitry Andric     __libdispatch::__chunk_partitions __partitions =
122*0fca6ea1SDimitry Andric         __libdispatch::__partition_chunks(std::max<ptrdiff_t>(__last1 - __first1, __last2 - __first2));
123*0fca6ea1SDimitry Andric 
124*0fca6ea1SDimitry Andric     if (__partitions.__chunk_count_ == 0)
125*0fca6ea1SDimitry Andric       return __empty{};
126*0fca6ea1SDimitry Andric 
127*0fca6ea1SDimitry Andric     if (__partitions.__chunk_count_ == 1) {
128*0fca6ea1SDimitry Andric       __leaf_merge(__first1, __last1, __first2, __last2, __result, __comp);
129*0fca6ea1SDimitry Andric       return __empty{};
130*0fca6ea1SDimitry Andric     }
131*0fca6ea1SDimitry Andric 
132*0fca6ea1SDimitry Andric     using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>;
133*0fca6ea1SDimitry Andric     auto const __n_ranges = __partitions.__chunk_count_ + 1;
134*0fca6ea1SDimitry Andric 
135*0fca6ea1SDimitry Andric     // TODO: use __uninitialized_buffer
136*0fca6ea1SDimitry Andric     auto __destroy = [=](__merge_range_t* __ptr) {
137*0fca6ea1SDimitry Andric       std::destroy_n(__ptr, __n_ranges);
138*0fca6ea1SDimitry Andric       std::allocator<__merge_range_t>().deallocate(__ptr, __n_ranges);
139*0fca6ea1SDimitry Andric     };
140*0fca6ea1SDimitry Andric 
141*0fca6ea1SDimitry Andric     unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges(
142*0fca6ea1SDimitry Andric         [&]() -> __merge_range_t* {
143*0fca6ea1SDimitry Andric #ifndef _LIBCPP_HAS_NO_EXCEPTIONS
144*0fca6ea1SDimitry Andric           try {
145*0fca6ea1SDimitry Andric #endif
146*0fca6ea1SDimitry Andric             return std::allocator<__merge_range_t>().allocate(__n_ranges);
147*0fca6ea1SDimitry Andric #ifndef _LIBCPP_HAS_NO_EXCEPTIONS
148*0fca6ea1SDimitry Andric           } catch (const std::bad_alloc&) {
149*0fca6ea1SDimitry Andric             return nullptr;
150*0fca6ea1SDimitry Andric           }
151*0fca6ea1SDimitry Andric #endif
152*0fca6ea1SDimitry Andric         }(),
153*0fca6ea1SDimitry Andric         __destroy);
154*0fca6ea1SDimitry Andric 
155*0fca6ea1SDimitry Andric     if (!__ranges)
156*0fca6ea1SDimitry Andric       return nullopt;
157*0fca6ea1SDimitry Andric 
158*0fca6ea1SDimitry Andric     // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case
159*0fca6ea1SDimitry Andric     __merge_range_t* __r = __ranges.get();
160*0fca6ea1SDimitry Andric     std::__construct_at(__r++, __first1, __first2, __result);
161*0fca6ea1SDimitry Andric 
162*0fca6ea1SDimitry Andric     bool __iterate_first_range = __last1 - __first1 > __last2 - __first2;
163*0fca6ea1SDimitry Andric 
164*0fca6ea1SDimitry Andric     auto __compute_chunk = [&](size_t __chunk_size) -> __merge_range_t {
165*0fca6ea1SDimitry Andric       auto [__mid1, __mid2] = [&] {
166*0fca6ea1SDimitry Andric         if (__iterate_first_range) {
167*0fca6ea1SDimitry Andric           auto __m1 = __first1 + __chunk_size;
168*0fca6ea1SDimitry Andric           auto __m2 = std::lower_bound(__first2, __last2, __m1[-1], __comp);
169*0fca6ea1SDimitry Andric           return std::make_pair(__m1, __m2);
170*0fca6ea1SDimitry Andric         } else {
171*0fca6ea1SDimitry Andric           auto __m2 = __first2 + __chunk_size;
172*0fca6ea1SDimitry Andric           auto __m1 = std::lower_bound(__first1, __last1, __m2[-1], __comp);
173*0fca6ea1SDimitry Andric           return std::make_pair(__m1, __m2);
174*0fca6ea1SDimitry Andric         }
175*0fca6ea1SDimitry Andric       }();
176*0fca6ea1SDimitry Andric 
177*0fca6ea1SDimitry Andric       __result += (__mid1 - __first1) + (__mid2 - __first2);
178*0fca6ea1SDimitry Andric       __first1 = __mid1;
179*0fca6ea1SDimitry Andric       __first2 = __mid2;
180*0fca6ea1SDimitry Andric       return {std::move(__mid1), std::move(__mid2), __result};
181*0fca6ea1SDimitry Andric     };
182*0fca6ea1SDimitry Andric 
183*0fca6ea1SDimitry Andric     // handle first chunk
184*0fca6ea1SDimitry Andric     std::__construct_at(__r++, __compute_chunk(__partitions.__first_chunk_size_));
185*0fca6ea1SDimitry Andric 
186*0fca6ea1SDimitry Andric     // handle 2 -> N - 1 chunks
187*0fca6ea1SDimitry Andric     for (ptrdiff_t __i = 0; __i != __partitions.__chunk_count_ - 2; ++__i)
188*0fca6ea1SDimitry Andric       std::__construct_at(__r++, __compute_chunk(__partitions.__chunk_size_));
189*0fca6ea1SDimitry Andric 
190*0fca6ea1SDimitry Andric     // handle last chunk
191*0fca6ea1SDimitry Andric     std::__construct_at(__r, __last1, __last2, __result);
192*0fca6ea1SDimitry Andric 
193*0fca6ea1SDimitry Andric     __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __index) {
194*0fca6ea1SDimitry Andric       auto __first_iters = __ranges[__index];
195*0fca6ea1SDimitry Andric       auto __last_iters  = __ranges[__index + 1];
196*0fca6ea1SDimitry Andric       __leaf_merge(
197*0fca6ea1SDimitry Andric           __first_iters.__mid1_,
198*0fca6ea1SDimitry Andric           __last_iters.__mid1_,
199*0fca6ea1SDimitry Andric           __first_iters.__mid2_,
200*0fca6ea1SDimitry Andric           __last_iters.__mid2_,
201*0fca6ea1SDimitry Andric           __first_iters.__result_,
202*0fca6ea1SDimitry Andric           __comp);
203*0fca6ea1SDimitry Andric     });
204*0fca6ea1SDimitry Andric 
205*0fca6ea1SDimitry Andric     return __empty{};
206*0fca6ea1SDimitry Andric   }
207*0fca6ea1SDimitry Andric 
208*0fca6ea1SDimitry Andric   template <class _RandomAccessIterator, class _Transform, class _Value, class _Combiner, class _Reduction>
209*0fca6ea1SDimitry Andric   _LIBCPP_HIDE_FROM_ABI static optional<_Value> __transform_reduce(
210*0fca6ea1SDimitry Andric       _RandomAccessIterator __first,
211*0fca6ea1SDimitry Andric       _RandomAccessIterator __last,
212*0fca6ea1SDimitry Andric       _Transform __transform,
213*0fca6ea1SDimitry Andric       _Value __init,
214*0fca6ea1SDimitry Andric       _Combiner __combiner,
215*0fca6ea1SDimitry Andric       _Reduction __reduction) {
216*0fca6ea1SDimitry Andric     if (__first == __last)
217*0fca6ea1SDimitry Andric       return __init;
218*0fca6ea1SDimitry Andric 
219*0fca6ea1SDimitry Andric     auto __partitions = __libdispatch::__partition_chunks(__last - __first);
220*0fca6ea1SDimitry Andric 
221*0fca6ea1SDimitry Andric     auto __destroy = [__count = __partitions.__chunk_count_](_Value* __ptr) {
222*0fca6ea1SDimitry Andric       std::destroy_n(__ptr, __count);
223*0fca6ea1SDimitry Andric       std::allocator<_Value>().deallocate(__ptr, __count);
224*0fca6ea1SDimitry Andric     };
225*0fca6ea1SDimitry Andric 
226*0fca6ea1SDimitry Andric     // TODO: use __uninitialized_buffer
227*0fca6ea1SDimitry Andric     // TODO: allocate one element per worker instead of one element per chunk
228*0fca6ea1SDimitry Andric     unique_ptr<_Value[], decltype(__destroy)> __values(
229*0fca6ea1SDimitry Andric         std::allocator<_Value>().allocate(__partitions.__chunk_count_), __destroy);
230*0fca6ea1SDimitry Andric 
231*0fca6ea1SDimitry Andric     // __dispatch_apply is noexcept
232*0fca6ea1SDimitry Andric     __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) {
233*0fca6ea1SDimitry Andric       auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_;
234*0fca6ea1SDimitry Andric       auto __index           = __chunk == 0 ? 0
235*0fca6ea1SDimitry Andric                                             : (__chunk * __partitions.__chunk_size_) +
236*0fca6ea1SDimitry Andric                                         (__partitions.__first_chunk_size_ - __partitions.__chunk_size_);
237*0fca6ea1SDimitry Andric       if (__this_chunk_size != 1) {
238*0fca6ea1SDimitry Andric         std::__construct_at(
239*0fca6ea1SDimitry Andric             __values.get() + __chunk,
240*0fca6ea1SDimitry Andric             __reduction(__first + __index + 2,
241*0fca6ea1SDimitry Andric                         __first + __index + __this_chunk_size,
242*0fca6ea1SDimitry Andric                         __combiner(__transform(__first + __index), __transform(__first + __index + 1))));
243*0fca6ea1SDimitry Andric       } else {
244*0fca6ea1SDimitry Andric         std::__construct_at(__values.get() + __chunk, __transform(__first + __index));
245*0fca6ea1SDimitry Andric       }
246*0fca6ea1SDimitry Andric     });
247*0fca6ea1SDimitry Andric 
248*0fca6ea1SDimitry Andric     return std::reduce(
249*0fca6ea1SDimitry Andric         std::make_move_iterator(__values.get()),
250*0fca6ea1SDimitry Andric         std::make_move_iterator(__values.get() + __partitions.__chunk_count_),
251*0fca6ea1SDimitry Andric         std::move(__init),
252*0fca6ea1SDimitry Andric         __combiner);
253*0fca6ea1SDimitry Andric   }
254*0fca6ea1SDimitry Andric 
255*0fca6ea1SDimitry Andric   template <class _RandomAccessIterator, class _Comp, class _LeafSort>
256*0fca6ea1SDimitry Andric   _LIBCPP_HIDE_FROM_ABI static optional<__empty>
257*0fca6ea1SDimitry Andric   __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp, _LeafSort __leaf_sort) {
258*0fca6ea1SDimitry Andric     const auto __size = __last - __first;
259*0fca6ea1SDimitry Andric     auto __partitions = __libdispatch::__partition_chunks(__size);
260*0fca6ea1SDimitry Andric 
261*0fca6ea1SDimitry Andric     if (__partitions.__chunk_count_ == 0)
262*0fca6ea1SDimitry Andric       return __empty{};
263*0fca6ea1SDimitry Andric 
264*0fca6ea1SDimitry Andric     if (__partitions.__chunk_count_ == 1) {
265*0fca6ea1SDimitry Andric       __leaf_sort(__first, __last, __comp);
266*0fca6ea1SDimitry Andric       return __empty{};
267*0fca6ea1SDimitry Andric     }
268*0fca6ea1SDimitry Andric 
269*0fca6ea1SDimitry Andric     using _Value = __iter_value_type<_RandomAccessIterator>;
270*0fca6ea1SDimitry Andric 
271*0fca6ea1SDimitry Andric     auto __destroy = [__size](_Value* __ptr) {
272*0fca6ea1SDimitry Andric       std::destroy_n(__ptr, __size);
273*0fca6ea1SDimitry Andric       std::allocator<_Value>().deallocate(__ptr, __size);
274*0fca6ea1SDimitry Andric     };
275*0fca6ea1SDimitry Andric 
276*0fca6ea1SDimitry Andric     // TODO: use __uninitialized_buffer
277*0fca6ea1SDimitry Andric     unique_ptr<_Value[], decltype(__destroy)> __values(std::allocator<_Value>().allocate(__size), __destroy);
278*0fca6ea1SDimitry Andric 
279*0fca6ea1SDimitry Andric     // Initialize all elements to a moved-from state
280*0fca6ea1SDimitry Andric     // TODO: Don't do this - this can be done in the first merge - see https://llvm.org/PR63928
281*0fca6ea1SDimitry Andric     std::__construct_at(__values.get(), std::move(*__first));
282*0fca6ea1SDimitry Andric     for (__iter_diff_t<_RandomAccessIterator> __i = 1; __i != __size; ++__i) {
283*0fca6ea1SDimitry Andric       std::__construct_at(__values.get() + __i, std::move(__values.get()[__i - 1]));
284*0fca6ea1SDimitry Andric     }
285*0fca6ea1SDimitry Andric     *__first = std::move(__values.get()[__size - 1]);
286*0fca6ea1SDimitry Andric 
287*0fca6ea1SDimitry Andric     __libdispatch::__dispatch_parallel_for(
288*0fca6ea1SDimitry Andric         __partitions,
289*0fca6ea1SDimitry Andric         __first,
290*0fca6ea1SDimitry Andric         [&__leaf_sort, &__comp](_RandomAccessIterator __chunk_first, _RandomAccessIterator __chunk_last) {
291*0fca6ea1SDimitry Andric           __leaf_sort(std::move(__chunk_first), std::move(__chunk_last), __comp);
292*0fca6ea1SDimitry Andric         });
293*0fca6ea1SDimitry Andric 
294*0fca6ea1SDimitry Andric     bool __objects_are_in_buffer = false;
295*0fca6ea1SDimitry Andric     do {
296*0fca6ea1SDimitry Andric       const auto __old_chunk_size = __partitions.__chunk_size_;
297*0fca6ea1SDimitry Andric       if (__partitions.__chunk_count_ % 2 == 1) {
298*0fca6ea1SDimitry Andric         auto __inplace_merge_chunks = [&__comp, &__partitions](auto __first_chunk_begin) {
299*0fca6ea1SDimitry Andric           std::inplace_merge(
300*0fca6ea1SDimitry Andric               __first_chunk_begin,
301*0fca6ea1SDimitry Andric               __first_chunk_begin + __partitions.__first_chunk_size_,
302*0fca6ea1SDimitry Andric               __first_chunk_begin + __partitions.__first_chunk_size_ + __partitions.__chunk_size_,
303*0fca6ea1SDimitry Andric               __comp);
304*0fca6ea1SDimitry Andric         };
305*0fca6ea1SDimitry Andric         if (__objects_are_in_buffer)
306*0fca6ea1SDimitry Andric           __inplace_merge_chunks(__values.get());
307*0fca6ea1SDimitry Andric         else
308*0fca6ea1SDimitry Andric           __inplace_merge_chunks(__first);
309*0fca6ea1SDimitry Andric         __partitions.__first_chunk_size_ += 2 * __partitions.__chunk_size_;
310*0fca6ea1SDimitry Andric       } else {
311*0fca6ea1SDimitry Andric         __partitions.__first_chunk_size_ += __partitions.__chunk_size_;
312*0fca6ea1SDimitry Andric       }
313*0fca6ea1SDimitry Andric 
314*0fca6ea1SDimitry Andric       __partitions.__chunk_size_ *= 2;
315*0fca6ea1SDimitry Andric       __partitions.__chunk_count_ /= 2;
316*0fca6ea1SDimitry Andric 
317*0fca6ea1SDimitry Andric       auto __merge_chunks = [__partitions, __old_chunk_size, &__comp](auto __from_first, auto __to_first) {
318*0fca6ea1SDimitry Andric         __libdispatch::__dispatch_parallel_for(
319*0fca6ea1SDimitry Andric             __partitions,
320*0fca6ea1SDimitry Andric             __from_first,
321*0fca6ea1SDimitry Andric             [__old_chunk_size, &__from_first, &__to_first, &__comp](auto __chunk_first, auto __chunk_last) {
322*0fca6ea1SDimitry Andric               std::merge(std::make_move_iterator(__chunk_first),
323*0fca6ea1SDimitry Andric                          std::make_move_iterator(__chunk_last - __old_chunk_size),
324*0fca6ea1SDimitry Andric                          std::make_move_iterator(__chunk_last - __old_chunk_size),
325*0fca6ea1SDimitry Andric                          std::make_move_iterator(__chunk_last),
326*0fca6ea1SDimitry Andric                          __to_first + (__chunk_first - __from_first),
327*0fca6ea1SDimitry Andric                          __comp);
328*0fca6ea1SDimitry Andric             });
329*0fca6ea1SDimitry Andric       };
330*0fca6ea1SDimitry Andric 
331*0fca6ea1SDimitry Andric       if (__objects_are_in_buffer)
332*0fca6ea1SDimitry Andric         __merge_chunks(__values.get(), __first);
333*0fca6ea1SDimitry Andric       else
334*0fca6ea1SDimitry Andric         __merge_chunks(__first, __values.get());
335*0fca6ea1SDimitry Andric       __objects_are_in_buffer = !__objects_are_in_buffer;
336*0fca6ea1SDimitry Andric     } while (__partitions.__chunk_count_ > 1);
337*0fca6ea1SDimitry Andric 
338*0fca6ea1SDimitry Andric     if (__objects_are_in_buffer) {
339*0fca6ea1SDimitry Andric       std::move(__values.get(), __values.get() + __size, __first);
340*0fca6ea1SDimitry Andric     }
341*0fca6ea1SDimitry Andric 
342*0fca6ea1SDimitry Andric     return __empty{};
343*0fca6ea1SDimitry Andric   }
344*0fca6ea1SDimitry Andric 
345*0fca6ea1SDimitry Andric   _LIBCPP_HIDE_FROM_ABI static void __cancel_execution() {}
346*0fca6ea1SDimitry Andric 
347*0fca6ea1SDimitry Andric   static constexpr size_t __lane_size = 64;
348*0fca6ea1SDimitry Andric };
349*0fca6ea1SDimitry Andric 
350*0fca6ea1SDimitry Andric // Mandatory implementations of the computational basis
351*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
352*0fca6ea1SDimitry Andric struct __find_if<__libdispatch_backend_tag, _ExecutionPolicy>
353*0fca6ea1SDimitry Andric     : __cpu_parallel_find_if<__libdispatch_backend_tag, _ExecutionPolicy> {};
354*0fca6ea1SDimitry Andric 
355*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
356*0fca6ea1SDimitry Andric struct __for_each<__libdispatch_backend_tag, _ExecutionPolicy>
357*0fca6ea1SDimitry Andric     : __cpu_parallel_for_each<__libdispatch_backend_tag, _ExecutionPolicy> {};
358*0fca6ea1SDimitry Andric 
359*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
360*0fca6ea1SDimitry Andric struct __merge<__libdispatch_backend_tag, _ExecutionPolicy>
361*0fca6ea1SDimitry Andric     : __cpu_parallel_merge<__libdispatch_backend_tag, _ExecutionPolicy> {};
362*0fca6ea1SDimitry Andric 
363*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
364*0fca6ea1SDimitry Andric struct __stable_sort<__libdispatch_backend_tag, _ExecutionPolicy>
365*0fca6ea1SDimitry Andric     : __cpu_parallel_stable_sort<__libdispatch_backend_tag, _ExecutionPolicy> {};
366*0fca6ea1SDimitry Andric 
367*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
368*0fca6ea1SDimitry Andric struct __transform<__libdispatch_backend_tag, _ExecutionPolicy>
369*0fca6ea1SDimitry Andric     : __cpu_parallel_transform<__libdispatch_backend_tag, _ExecutionPolicy> {};
370*0fca6ea1SDimitry Andric 
371*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
372*0fca6ea1SDimitry Andric struct __transform_binary<__libdispatch_backend_tag, _ExecutionPolicy>
373*0fca6ea1SDimitry Andric     : __cpu_parallel_transform_binary<__libdispatch_backend_tag, _ExecutionPolicy> {};
374*0fca6ea1SDimitry Andric 
375*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
376*0fca6ea1SDimitry Andric struct __transform_reduce<__libdispatch_backend_tag, _ExecutionPolicy>
377*0fca6ea1SDimitry Andric     : __cpu_parallel_transform_reduce<__libdispatch_backend_tag, _ExecutionPolicy> {};
378*0fca6ea1SDimitry Andric 
379*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
380*0fca6ea1SDimitry Andric struct __transform_reduce_binary<__libdispatch_backend_tag, _ExecutionPolicy>
381*0fca6ea1SDimitry Andric     : __cpu_parallel_transform_reduce_binary<__libdispatch_backend_tag, _ExecutionPolicy> {};
382*0fca6ea1SDimitry Andric 
383*0fca6ea1SDimitry Andric // Not mandatory, but better optimized
384*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
385*0fca6ea1SDimitry Andric struct __any_of<__libdispatch_backend_tag, _ExecutionPolicy>
386*0fca6ea1SDimitry Andric     : __cpu_parallel_any_of<__libdispatch_backend_tag, _ExecutionPolicy> {};
387*0fca6ea1SDimitry Andric 
388*0fca6ea1SDimitry Andric template <class _ExecutionPolicy>
389*0fca6ea1SDimitry Andric struct __fill<__libdispatch_backend_tag, _ExecutionPolicy>
390*0fca6ea1SDimitry Andric     : __cpu_parallel_fill<__libdispatch_backend_tag, _ExecutionPolicy> {};
391*0fca6ea1SDimitry Andric 
392*0fca6ea1SDimitry Andric } // namespace __pstl
393*0fca6ea1SDimitry Andric _LIBCPP_END_NAMESPACE_STD
394*0fca6ea1SDimitry Andric 
395*0fca6ea1SDimitry Andric _LIBCPP_POP_MACROS
396*0fca6ea1SDimitry Andric 
397*0fca6ea1SDimitry Andric #endif // _LIBCPP___PSTL_BACKENDS_LIBDISPATCH_H
398