1*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 2*0fca6ea1SDimitry Andric // 3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0fca6ea1SDimitry Andric // 7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 8*0fca6ea1SDimitry Andric 9*0fca6ea1SDimitry Andric #ifndef _LIBCPP___PSTL_BACKENDS_LIBDISPATCH_H 10*0fca6ea1SDimitry Andric #define _LIBCPP___PSTL_BACKENDS_LIBDISPATCH_H 11*0fca6ea1SDimitry Andric 12*0fca6ea1SDimitry Andric #include <__algorithm/inplace_merge.h> 13*0fca6ea1SDimitry Andric #include <__algorithm/lower_bound.h> 14*0fca6ea1SDimitry Andric #include <__algorithm/max.h> 15*0fca6ea1SDimitry Andric #include <__algorithm/merge.h> 16*0fca6ea1SDimitry Andric #include <__algorithm/upper_bound.h> 17*0fca6ea1SDimitry Andric #include <__atomic/atomic.h> 18*0fca6ea1SDimitry Andric #include <__config> 19*0fca6ea1SDimitry Andric #include <__exception/terminate.h> 20*0fca6ea1SDimitry Andric #include <__iterator/iterator_traits.h> 21*0fca6ea1SDimitry Andric #include <__iterator/move_iterator.h> 22*0fca6ea1SDimitry Andric #include <__memory/allocator.h> 23*0fca6ea1SDimitry Andric #include <__memory/construct_at.h> 24*0fca6ea1SDimitry Andric #include <__memory/unique_ptr.h> 25*0fca6ea1SDimitry Andric #include <__numeric/reduce.h> 26*0fca6ea1SDimitry Andric #include <__pstl/backend_fwd.h> 27*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/any_of.h> 28*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/cpu_traits.h> 29*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/fill.h> 30*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/find_if.h> 31*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/for_each.h> 32*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/merge.h> 33*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/stable_sort.h> 34*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/transform.h> 35*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/transform_reduce.h> 36*0fca6ea1SDimitry Andric #include <__utility/empty.h> 37*0fca6ea1SDimitry Andric #include <__utility/exception_guard.h> 38*0fca6ea1SDimitry Andric #include <__utility/move.h> 39*0fca6ea1SDimitry Andric #include <__utility/pair.h> 40*0fca6ea1SDimitry Andric #include <cstddef> 41*0fca6ea1SDimitry Andric #include <new> 42*0fca6ea1SDimitry Andric #include <optional> 43*0fca6ea1SDimitry Andric 44*0fca6ea1SDimitry Andric _LIBCPP_PUSH_MACROS 45*0fca6ea1SDimitry Andric #include <__undef_macros> 46*0fca6ea1SDimitry Andric 47*0fca6ea1SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD 48*0fca6ea1SDimitry Andric namespace __pstl { 49*0fca6ea1SDimitry Andric 50*0fca6ea1SDimitry Andric namespace __libdispatch { 51*0fca6ea1SDimitry Andric // ::dispatch_apply is marked as __attribute__((nothrow)) because it doesn't let exceptions propagate, and neither do 52*0fca6ea1SDimitry Andric // we. 53*0fca6ea1SDimitry Andric // TODO: Do we want to add [[_Clang::__callback__(__func, __context, __)]]? 54*0fca6ea1SDimitry Andric _LIBCPP_EXPORTED_FROM_ABI void 55*0fca6ea1SDimitry Andric __dispatch_apply(size_t __chunk_count, void* __context, void (*__func)(void* __context, size_t __chunk)) noexcept; 56*0fca6ea1SDimitry Andric 57*0fca6ea1SDimitry Andric template <class _Func> 58*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI void __dispatch_apply(size_t __chunk_count, _Func __func) noexcept { 59*0fca6ea1SDimitry Andric __libdispatch::__dispatch_apply(__chunk_count, &__func, [](void* __context, size_t __chunk) { 60*0fca6ea1SDimitry Andric (*static_cast<_Func*>(__context))(__chunk); 61*0fca6ea1SDimitry Andric }); 62*0fca6ea1SDimitry Andric } 63*0fca6ea1SDimitry Andric 64*0fca6ea1SDimitry Andric struct __chunk_partitions { 65*0fca6ea1SDimitry Andric ptrdiff_t __chunk_count_; // includes the first chunk 66*0fca6ea1SDimitry Andric ptrdiff_t __chunk_size_; 67*0fca6ea1SDimitry Andric ptrdiff_t __first_chunk_size_; 68*0fca6ea1SDimitry Andric }; 69*0fca6ea1SDimitry Andric 70*0fca6ea1SDimitry Andric [[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size) noexcept; 71*0fca6ea1SDimitry Andric 72*0fca6ea1SDimitry Andric template <class _RandomAccessIterator, class _Functor> 73*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI optional<__empty> 74*0fca6ea1SDimitry Andric __dispatch_parallel_for(__chunk_partitions __partitions, _RandomAccessIterator __first, _Functor __func) { 75*0fca6ea1SDimitry Andric // Perform the chunked execution. 76*0fca6ea1SDimitry Andric __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) { 77*0fca6ea1SDimitry Andric auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_; 78*0fca6ea1SDimitry Andric auto __index = 79*0fca6ea1SDimitry Andric __chunk == 0 80*0fca6ea1SDimitry Andric ? 0 81*0fca6ea1SDimitry Andric : (__chunk * __partitions.__chunk_size_) + (__partitions.__first_chunk_size_ - __partitions.__chunk_size_); 82*0fca6ea1SDimitry Andric __func(__first + __index, __first + __index + __this_chunk_size); 83*0fca6ea1SDimitry Andric }); 84*0fca6ea1SDimitry Andric 85*0fca6ea1SDimitry Andric return __empty{}; 86*0fca6ea1SDimitry Andric } 87*0fca6ea1SDimitry Andric } // namespace __libdispatch 88*0fca6ea1SDimitry Andric 89*0fca6ea1SDimitry Andric template <> 90*0fca6ea1SDimitry Andric struct __cpu_traits<__libdispatch_backend_tag> { 91*0fca6ea1SDimitry Andric template <class _RandomAccessIterator, class _Functor> 92*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI static optional<__empty> 93*0fca6ea1SDimitry Andric __for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) { 94*0fca6ea1SDimitry Andric return __libdispatch::__dispatch_parallel_for( 95*0fca6ea1SDimitry Andric __libdispatch::__partition_chunks(__last - __first), std::move(__first), std::move(__func)); 96*0fca6ea1SDimitry Andric } 97*0fca6ea1SDimitry Andric 98*0fca6ea1SDimitry Andric template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIteratorOut> 99*0fca6ea1SDimitry Andric struct __merge_range { 100*0fca6ea1SDimitry Andric __merge_range(_RandomAccessIterator1 __mid1, _RandomAccessIterator2 __mid2, _RandomAccessIteratorOut __result) 101*0fca6ea1SDimitry Andric : __mid1_(__mid1), __mid2_(__mid2), __result_(__result) {} 102*0fca6ea1SDimitry Andric 103*0fca6ea1SDimitry Andric _RandomAccessIterator1 __mid1_; 104*0fca6ea1SDimitry Andric _RandomAccessIterator2 __mid2_; 105*0fca6ea1SDimitry Andric _RandomAccessIteratorOut __result_; 106*0fca6ea1SDimitry Andric }; 107*0fca6ea1SDimitry Andric 108*0fca6ea1SDimitry Andric template <typename _RandomAccessIterator1, 109*0fca6ea1SDimitry Andric typename _RandomAccessIterator2, 110*0fca6ea1SDimitry Andric typename _RandomAccessIterator3, 111*0fca6ea1SDimitry Andric typename _Compare, 112*0fca6ea1SDimitry Andric typename _LeafMerge> 113*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI static optional<__empty> 114*0fca6ea1SDimitry Andric __merge(_RandomAccessIterator1 __first1, 115*0fca6ea1SDimitry Andric _RandomAccessIterator1 __last1, 116*0fca6ea1SDimitry Andric _RandomAccessIterator2 __first2, 117*0fca6ea1SDimitry Andric _RandomAccessIterator2 __last2, 118*0fca6ea1SDimitry Andric _RandomAccessIterator3 __result, 119*0fca6ea1SDimitry Andric _Compare __comp, 120*0fca6ea1SDimitry Andric _LeafMerge __leaf_merge) noexcept { 121*0fca6ea1SDimitry Andric __libdispatch::__chunk_partitions __partitions = 122*0fca6ea1SDimitry Andric __libdispatch::__partition_chunks(std::max<ptrdiff_t>(__last1 - __first1, __last2 - __first2)); 123*0fca6ea1SDimitry Andric 124*0fca6ea1SDimitry Andric if (__partitions.__chunk_count_ == 0) 125*0fca6ea1SDimitry Andric return __empty{}; 126*0fca6ea1SDimitry Andric 127*0fca6ea1SDimitry Andric if (__partitions.__chunk_count_ == 1) { 128*0fca6ea1SDimitry Andric __leaf_merge(__first1, __last1, __first2, __last2, __result, __comp); 129*0fca6ea1SDimitry Andric return __empty{}; 130*0fca6ea1SDimitry Andric } 131*0fca6ea1SDimitry Andric 132*0fca6ea1SDimitry Andric using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>; 133*0fca6ea1SDimitry Andric auto const __n_ranges = __partitions.__chunk_count_ + 1; 134*0fca6ea1SDimitry Andric 135*0fca6ea1SDimitry Andric // TODO: use __uninitialized_buffer 136*0fca6ea1SDimitry Andric auto __destroy = [=](__merge_range_t* __ptr) { 137*0fca6ea1SDimitry Andric std::destroy_n(__ptr, __n_ranges); 138*0fca6ea1SDimitry Andric std::allocator<__merge_range_t>().deallocate(__ptr, __n_ranges); 139*0fca6ea1SDimitry Andric }; 140*0fca6ea1SDimitry Andric 141*0fca6ea1SDimitry Andric unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges( 142*0fca6ea1SDimitry Andric [&]() -> __merge_range_t* { 143*0fca6ea1SDimitry Andric #ifndef _LIBCPP_HAS_NO_EXCEPTIONS 144*0fca6ea1SDimitry Andric try { 145*0fca6ea1SDimitry Andric #endif 146*0fca6ea1SDimitry Andric return std::allocator<__merge_range_t>().allocate(__n_ranges); 147*0fca6ea1SDimitry Andric #ifndef _LIBCPP_HAS_NO_EXCEPTIONS 148*0fca6ea1SDimitry Andric } catch (const std::bad_alloc&) { 149*0fca6ea1SDimitry Andric return nullptr; 150*0fca6ea1SDimitry Andric } 151*0fca6ea1SDimitry Andric #endif 152*0fca6ea1SDimitry Andric }(), 153*0fca6ea1SDimitry Andric __destroy); 154*0fca6ea1SDimitry Andric 155*0fca6ea1SDimitry Andric if (!__ranges) 156*0fca6ea1SDimitry Andric return nullopt; 157*0fca6ea1SDimitry Andric 158*0fca6ea1SDimitry Andric // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case 159*0fca6ea1SDimitry Andric __merge_range_t* __r = __ranges.get(); 160*0fca6ea1SDimitry Andric std::__construct_at(__r++, __first1, __first2, __result); 161*0fca6ea1SDimitry Andric 162*0fca6ea1SDimitry Andric bool __iterate_first_range = __last1 - __first1 > __last2 - __first2; 163*0fca6ea1SDimitry Andric 164*0fca6ea1SDimitry Andric auto __compute_chunk = [&](size_t __chunk_size) -> __merge_range_t { 165*0fca6ea1SDimitry Andric auto [__mid1, __mid2] = [&] { 166*0fca6ea1SDimitry Andric if (__iterate_first_range) { 167*0fca6ea1SDimitry Andric auto __m1 = __first1 + __chunk_size; 168*0fca6ea1SDimitry Andric auto __m2 = std::lower_bound(__first2, __last2, __m1[-1], __comp); 169*0fca6ea1SDimitry Andric return std::make_pair(__m1, __m2); 170*0fca6ea1SDimitry Andric } else { 171*0fca6ea1SDimitry Andric auto __m2 = __first2 + __chunk_size; 172*0fca6ea1SDimitry Andric auto __m1 = std::lower_bound(__first1, __last1, __m2[-1], __comp); 173*0fca6ea1SDimitry Andric return std::make_pair(__m1, __m2); 174*0fca6ea1SDimitry Andric } 175*0fca6ea1SDimitry Andric }(); 176*0fca6ea1SDimitry Andric 177*0fca6ea1SDimitry Andric __result += (__mid1 - __first1) + (__mid2 - __first2); 178*0fca6ea1SDimitry Andric __first1 = __mid1; 179*0fca6ea1SDimitry Andric __first2 = __mid2; 180*0fca6ea1SDimitry Andric return {std::move(__mid1), std::move(__mid2), __result}; 181*0fca6ea1SDimitry Andric }; 182*0fca6ea1SDimitry Andric 183*0fca6ea1SDimitry Andric // handle first chunk 184*0fca6ea1SDimitry Andric std::__construct_at(__r++, __compute_chunk(__partitions.__first_chunk_size_)); 185*0fca6ea1SDimitry Andric 186*0fca6ea1SDimitry Andric // handle 2 -> N - 1 chunks 187*0fca6ea1SDimitry Andric for (ptrdiff_t __i = 0; __i != __partitions.__chunk_count_ - 2; ++__i) 188*0fca6ea1SDimitry Andric std::__construct_at(__r++, __compute_chunk(__partitions.__chunk_size_)); 189*0fca6ea1SDimitry Andric 190*0fca6ea1SDimitry Andric // handle last chunk 191*0fca6ea1SDimitry Andric std::__construct_at(__r, __last1, __last2, __result); 192*0fca6ea1SDimitry Andric 193*0fca6ea1SDimitry Andric __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __index) { 194*0fca6ea1SDimitry Andric auto __first_iters = __ranges[__index]; 195*0fca6ea1SDimitry Andric auto __last_iters = __ranges[__index + 1]; 196*0fca6ea1SDimitry Andric __leaf_merge( 197*0fca6ea1SDimitry Andric __first_iters.__mid1_, 198*0fca6ea1SDimitry Andric __last_iters.__mid1_, 199*0fca6ea1SDimitry Andric __first_iters.__mid2_, 200*0fca6ea1SDimitry Andric __last_iters.__mid2_, 201*0fca6ea1SDimitry Andric __first_iters.__result_, 202*0fca6ea1SDimitry Andric __comp); 203*0fca6ea1SDimitry Andric }); 204*0fca6ea1SDimitry Andric 205*0fca6ea1SDimitry Andric return __empty{}; 206*0fca6ea1SDimitry Andric } 207*0fca6ea1SDimitry Andric 208*0fca6ea1SDimitry Andric template <class _RandomAccessIterator, class _Transform, class _Value, class _Combiner, class _Reduction> 209*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI static optional<_Value> __transform_reduce( 210*0fca6ea1SDimitry Andric _RandomAccessIterator __first, 211*0fca6ea1SDimitry Andric _RandomAccessIterator __last, 212*0fca6ea1SDimitry Andric _Transform __transform, 213*0fca6ea1SDimitry Andric _Value __init, 214*0fca6ea1SDimitry Andric _Combiner __combiner, 215*0fca6ea1SDimitry Andric _Reduction __reduction) { 216*0fca6ea1SDimitry Andric if (__first == __last) 217*0fca6ea1SDimitry Andric return __init; 218*0fca6ea1SDimitry Andric 219*0fca6ea1SDimitry Andric auto __partitions = __libdispatch::__partition_chunks(__last - __first); 220*0fca6ea1SDimitry Andric 221*0fca6ea1SDimitry Andric auto __destroy = [__count = __partitions.__chunk_count_](_Value* __ptr) { 222*0fca6ea1SDimitry Andric std::destroy_n(__ptr, __count); 223*0fca6ea1SDimitry Andric std::allocator<_Value>().deallocate(__ptr, __count); 224*0fca6ea1SDimitry Andric }; 225*0fca6ea1SDimitry Andric 226*0fca6ea1SDimitry Andric // TODO: use __uninitialized_buffer 227*0fca6ea1SDimitry Andric // TODO: allocate one element per worker instead of one element per chunk 228*0fca6ea1SDimitry Andric unique_ptr<_Value[], decltype(__destroy)> __values( 229*0fca6ea1SDimitry Andric std::allocator<_Value>().allocate(__partitions.__chunk_count_), __destroy); 230*0fca6ea1SDimitry Andric 231*0fca6ea1SDimitry Andric // __dispatch_apply is noexcept 232*0fca6ea1SDimitry Andric __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) { 233*0fca6ea1SDimitry Andric auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_; 234*0fca6ea1SDimitry Andric auto __index = __chunk == 0 ? 0 235*0fca6ea1SDimitry Andric : (__chunk * __partitions.__chunk_size_) + 236*0fca6ea1SDimitry Andric (__partitions.__first_chunk_size_ - __partitions.__chunk_size_); 237*0fca6ea1SDimitry Andric if (__this_chunk_size != 1) { 238*0fca6ea1SDimitry Andric std::__construct_at( 239*0fca6ea1SDimitry Andric __values.get() + __chunk, 240*0fca6ea1SDimitry Andric __reduction(__first + __index + 2, 241*0fca6ea1SDimitry Andric __first + __index + __this_chunk_size, 242*0fca6ea1SDimitry Andric __combiner(__transform(__first + __index), __transform(__first + __index + 1)))); 243*0fca6ea1SDimitry Andric } else { 244*0fca6ea1SDimitry Andric std::__construct_at(__values.get() + __chunk, __transform(__first + __index)); 245*0fca6ea1SDimitry Andric } 246*0fca6ea1SDimitry Andric }); 247*0fca6ea1SDimitry Andric 248*0fca6ea1SDimitry Andric return std::reduce( 249*0fca6ea1SDimitry Andric std::make_move_iterator(__values.get()), 250*0fca6ea1SDimitry Andric std::make_move_iterator(__values.get() + __partitions.__chunk_count_), 251*0fca6ea1SDimitry Andric std::move(__init), 252*0fca6ea1SDimitry Andric __combiner); 253*0fca6ea1SDimitry Andric } 254*0fca6ea1SDimitry Andric 255*0fca6ea1SDimitry Andric template <class _RandomAccessIterator, class _Comp, class _LeafSort> 256*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI static optional<__empty> 257*0fca6ea1SDimitry Andric __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp, _LeafSort __leaf_sort) { 258*0fca6ea1SDimitry Andric const auto __size = __last - __first; 259*0fca6ea1SDimitry Andric auto __partitions = __libdispatch::__partition_chunks(__size); 260*0fca6ea1SDimitry Andric 261*0fca6ea1SDimitry Andric if (__partitions.__chunk_count_ == 0) 262*0fca6ea1SDimitry Andric return __empty{}; 263*0fca6ea1SDimitry Andric 264*0fca6ea1SDimitry Andric if (__partitions.__chunk_count_ == 1) { 265*0fca6ea1SDimitry Andric __leaf_sort(__first, __last, __comp); 266*0fca6ea1SDimitry Andric return __empty{}; 267*0fca6ea1SDimitry Andric } 268*0fca6ea1SDimitry Andric 269*0fca6ea1SDimitry Andric using _Value = __iter_value_type<_RandomAccessIterator>; 270*0fca6ea1SDimitry Andric 271*0fca6ea1SDimitry Andric auto __destroy = [__size](_Value* __ptr) { 272*0fca6ea1SDimitry Andric std::destroy_n(__ptr, __size); 273*0fca6ea1SDimitry Andric std::allocator<_Value>().deallocate(__ptr, __size); 274*0fca6ea1SDimitry Andric }; 275*0fca6ea1SDimitry Andric 276*0fca6ea1SDimitry Andric // TODO: use __uninitialized_buffer 277*0fca6ea1SDimitry Andric unique_ptr<_Value[], decltype(__destroy)> __values(std::allocator<_Value>().allocate(__size), __destroy); 278*0fca6ea1SDimitry Andric 279*0fca6ea1SDimitry Andric // Initialize all elements to a moved-from state 280*0fca6ea1SDimitry Andric // TODO: Don't do this - this can be done in the first merge - see https://llvm.org/PR63928 281*0fca6ea1SDimitry Andric std::__construct_at(__values.get(), std::move(*__first)); 282*0fca6ea1SDimitry Andric for (__iter_diff_t<_RandomAccessIterator> __i = 1; __i != __size; ++__i) { 283*0fca6ea1SDimitry Andric std::__construct_at(__values.get() + __i, std::move(__values.get()[__i - 1])); 284*0fca6ea1SDimitry Andric } 285*0fca6ea1SDimitry Andric *__first = std::move(__values.get()[__size - 1]); 286*0fca6ea1SDimitry Andric 287*0fca6ea1SDimitry Andric __libdispatch::__dispatch_parallel_for( 288*0fca6ea1SDimitry Andric __partitions, 289*0fca6ea1SDimitry Andric __first, 290*0fca6ea1SDimitry Andric [&__leaf_sort, &__comp](_RandomAccessIterator __chunk_first, _RandomAccessIterator __chunk_last) { 291*0fca6ea1SDimitry Andric __leaf_sort(std::move(__chunk_first), std::move(__chunk_last), __comp); 292*0fca6ea1SDimitry Andric }); 293*0fca6ea1SDimitry Andric 294*0fca6ea1SDimitry Andric bool __objects_are_in_buffer = false; 295*0fca6ea1SDimitry Andric do { 296*0fca6ea1SDimitry Andric const auto __old_chunk_size = __partitions.__chunk_size_; 297*0fca6ea1SDimitry Andric if (__partitions.__chunk_count_ % 2 == 1) { 298*0fca6ea1SDimitry Andric auto __inplace_merge_chunks = [&__comp, &__partitions](auto __first_chunk_begin) { 299*0fca6ea1SDimitry Andric std::inplace_merge( 300*0fca6ea1SDimitry Andric __first_chunk_begin, 301*0fca6ea1SDimitry Andric __first_chunk_begin + __partitions.__first_chunk_size_, 302*0fca6ea1SDimitry Andric __first_chunk_begin + __partitions.__first_chunk_size_ + __partitions.__chunk_size_, 303*0fca6ea1SDimitry Andric __comp); 304*0fca6ea1SDimitry Andric }; 305*0fca6ea1SDimitry Andric if (__objects_are_in_buffer) 306*0fca6ea1SDimitry Andric __inplace_merge_chunks(__values.get()); 307*0fca6ea1SDimitry Andric else 308*0fca6ea1SDimitry Andric __inplace_merge_chunks(__first); 309*0fca6ea1SDimitry Andric __partitions.__first_chunk_size_ += 2 * __partitions.__chunk_size_; 310*0fca6ea1SDimitry Andric } else { 311*0fca6ea1SDimitry Andric __partitions.__first_chunk_size_ += __partitions.__chunk_size_; 312*0fca6ea1SDimitry Andric } 313*0fca6ea1SDimitry Andric 314*0fca6ea1SDimitry Andric __partitions.__chunk_size_ *= 2; 315*0fca6ea1SDimitry Andric __partitions.__chunk_count_ /= 2; 316*0fca6ea1SDimitry Andric 317*0fca6ea1SDimitry Andric auto __merge_chunks = [__partitions, __old_chunk_size, &__comp](auto __from_first, auto __to_first) { 318*0fca6ea1SDimitry Andric __libdispatch::__dispatch_parallel_for( 319*0fca6ea1SDimitry Andric __partitions, 320*0fca6ea1SDimitry Andric __from_first, 321*0fca6ea1SDimitry Andric [__old_chunk_size, &__from_first, &__to_first, &__comp](auto __chunk_first, auto __chunk_last) { 322*0fca6ea1SDimitry Andric std::merge(std::make_move_iterator(__chunk_first), 323*0fca6ea1SDimitry Andric std::make_move_iterator(__chunk_last - __old_chunk_size), 324*0fca6ea1SDimitry Andric std::make_move_iterator(__chunk_last - __old_chunk_size), 325*0fca6ea1SDimitry Andric std::make_move_iterator(__chunk_last), 326*0fca6ea1SDimitry Andric __to_first + (__chunk_first - __from_first), 327*0fca6ea1SDimitry Andric __comp); 328*0fca6ea1SDimitry Andric }); 329*0fca6ea1SDimitry Andric }; 330*0fca6ea1SDimitry Andric 331*0fca6ea1SDimitry Andric if (__objects_are_in_buffer) 332*0fca6ea1SDimitry Andric __merge_chunks(__values.get(), __first); 333*0fca6ea1SDimitry Andric else 334*0fca6ea1SDimitry Andric __merge_chunks(__first, __values.get()); 335*0fca6ea1SDimitry Andric __objects_are_in_buffer = !__objects_are_in_buffer; 336*0fca6ea1SDimitry Andric } while (__partitions.__chunk_count_ > 1); 337*0fca6ea1SDimitry Andric 338*0fca6ea1SDimitry Andric if (__objects_are_in_buffer) { 339*0fca6ea1SDimitry Andric std::move(__values.get(), __values.get() + __size, __first); 340*0fca6ea1SDimitry Andric } 341*0fca6ea1SDimitry Andric 342*0fca6ea1SDimitry Andric return __empty{}; 343*0fca6ea1SDimitry Andric } 344*0fca6ea1SDimitry Andric 345*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI static void __cancel_execution() {} 346*0fca6ea1SDimitry Andric 347*0fca6ea1SDimitry Andric static constexpr size_t __lane_size = 64; 348*0fca6ea1SDimitry Andric }; 349*0fca6ea1SDimitry Andric 350*0fca6ea1SDimitry Andric // Mandatory implementations of the computational basis 351*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 352*0fca6ea1SDimitry Andric struct __find_if<__libdispatch_backend_tag, _ExecutionPolicy> 353*0fca6ea1SDimitry Andric : __cpu_parallel_find_if<__libdispatch_backend_tag, _ExecutionPolicy> {}; 354*0fca6ea1SDimitry Andric 355*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 356*0fca6ea1SDimitry Andric struct __for_each<__libdispatch_backend_tag, _ExecutionPolicy> 357*0fca6ea1SDimitry Andric : __cpu_parallel_for_each<__libdispatch_backend_tag, _ExecutionPolicy> {}; 358*0fca6ea1SDimitry Andric 359*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 360*0fca6ea1SDimitry Andric struct __merge<__libdispatch_backend_tag, _ExecutionPolicy> 361*0fca6ea1SDimitry Andric : __cpu_parallel_merge<__libdispatch_backend_tag, _ExecutionPolicy> {}; 362*0fca6ea1SDimitry Andric 363*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 364*0fca6ea1SDimitry Andric struct __stable_sort<__libdispatch_backend_tag, _ExecutionPolicy> 365*0fca6ea1SDimitry Andric : __cpu_parallel_stable_sort<__libdispatch_backend_tag, _ExecutionPolicy> {}; 366*0fca6ea1SDimitry Andric 367*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 368*0fca6ea1SDimitry Andric struct __transform<__libdispatch_backend_tag, _ExecutionPolicy> 369*0fca6ea1SDimitry Andric : __cpu_parallel_transform<__libdispatch_backend_tag, _ExecutionPolicy> {}; 370*0fca6ea1SDimitry Andric 371*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 372*0fca6ea1SDimitry Andric struct __transform_binary<__libdispatch_backend_tag, _ExecutionPolicy> 373*0fca6ea1SDimitry Andric : __cpu_parallel_transform_binary<__libdispatch_backend_tag, _ExecutionPolicy> {}; 374*0fca6ea1SDimitry Andric 375*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 376*0fca6ea1SDimitry Andric struct __transform_reduce<__libdispatch_backend_tag, _ExecutionPolicy> 377*0fca6ea1SDimitry Andric : __cpu_parallel_transform_reduce<__libdispatch_backend_tag, _ExecutionPolicy> {}; 378*0fca6ea1SDimitry Andric 379*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 380*0fca6ea1SDimitry Andric struct __transform_reduce_binary<__libdispatch_backend_tag, _ExecutionPolicy> 381*0fca6ea1SDimitry Andric : __cpu_parallel_transform_reduce_binary<__libdispatch_backend_tag, _ExecutionPolicy> {}; 382*0fca6ea1SDimitry Andric 383*0fca6ea1SDimitry Andric // Not mandatory, but better optimized 384*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 385*0fca6ea1SDimitry Andric struct __any_of<__libdispatch_backend_tag, _ExecutionPolicy> 386*0fca6ea1SDimitry Andric : __cpu_parallel_any_of<__libdispatch_backend_tag, _ExecutionPolicy> {}; 387*0fca6ea1SDimitry Andric 388*0fca6ea1SDimitry Andric template <class _ExecutionPolicy> 389*0fca6ea1SDimitry Andric struct __fill<__libdispatch_backend_tag, _ExecutionPolicy> 390*0fca6ea1SDimitry Andric : __cpu_parallel_fill<__libdispatch_backend_tag, _ExecutionPolicy> {}; 391*0fca6ea1SDimitry Andric 392*0fca6ea1SDimitry Andric } // namespace __pstl 393*0fca6ea1SDimitry Andric _LIBCPP_END_NAMESPACE_STD 394*0fca6ea1SDimitry Andric 395*0fca6ea1SDimitry Andric _LIBCPP_POP_MACROS 396*0fca6ea1SDimitry Andric 397*0fca6ea1SDimitry Andric #endif // _LIBCPP___PSTL_BACKENDS_LIBDISPATCH_H 398