1*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 2*0fca6ea1SDimitry Andric // 3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0fca6ea1SDimitry Andric // 7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 8*0fca6ea1SDimitry Andric 9*0fca6ea1SDimitry Andric #ifndef _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H 10*0fca6ea1SDimitry Andric #define _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H 11*0fca6ea1SDimitry Andric 12*0fca6ea1SDimitry Andric #include <__algorithm/find_if.h> 13*0fca6ea1SDimitry Andric #include <__assert> 14*0fca6ea1SDimitry Andric #include <__atomic/atomic.h> 15*0fca6ea1SDimitry Andric #include <__config> 16*0fca6ea1SDimitry Andric #include <__functional/operations.h> 17*0fca6ea1SDimitry Andric #include <__iterator/concepts.h> 18*0fca6ea1SDimitry Andric #include <__iterator/iterator_traits.h> 19*0fca6ea1SDimitry Andric #include <__pstl/backend_fwd.h> 20*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/cpu_traits.h> 21*0fca6ea1SDimitry Andric #include <__type_traits/is_execution_policy.h> 22*0fca6ea1SDimitry Andric #include <__utility/move.h> 23*0fca6ea1SDimitry Andric #include <__utility/pair.h> 24*0fca6ea1SDimitry Andric #include <cstddef> 25*0fca6ea1SDimitry Andric #include <optional> 26*0fca6ea1SDimitry Andric 27*0fca6ea1SDimitry Andric #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 28*0fca6ea1SDimitry Andric # pragma GCC system_header 29*0fca6ea1SDimitry Andric #endif 30*0fca6ea1SDimitry Andric 31*0fca6ea1SDimitry Andric _LIBCPP_PUSH_MACROS 32*0fca6ea1SDimitry Andric #include <__undef_macros> 33*0fca6ea1SDimitry Andric 34*0fca6ea1SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD 35*0fca6ea1SDimitry Andric namespace __pstl { 36*0fca6ea1SDimitry Andric 37*0fca6ea1SDimitry Andric template <class _Backend, class _Index, class _Brick, class _Compare> 38*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI optional<_Index> 39*0fca6ea1SDimitry Andric __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) { 40*0fca6ea1SDimitry Andric typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; 41*0fca6ea1SDimitry Andric const _DifferenceType __n = __last - __first; 42*0fca6ea1SDimitry Andric _DifferenceType __initial_dist = __b_first ? __n : -1; 43*0fca6ea1SDimitry Andric std::atomic<_DifferenceType> __extremum(__initial_dist); 44*0fca6ea1SDimitry Andric // TODO: find out what is better here: parallel_for or parallel_reduce 45*0fca6ea1SDimitry Andric auto __res = 46*0fca6ea1SDimitry Andric __cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { 47*0fca6ea1SDimitry Andric // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of 48*0fca6ea1SDimitry Andric // why using a shared variable scales fairly well in this situation. 49*0fca6ea1SDimitry Andric if (__comp(__i - __first, __extremum)) { 50*0fca6ea1SDimitry Andric _Index __result = __f(__i, __j); 51*0fca6ea1SDimitry Andric // If not '__last' returned then we found what we want so put this to extremum 52*0fca6ea1SDimitry Andric if (__result != __j) { 53*0fca6ea1SDimitry Andric const _DifferenceType __k = __result - __first; 54*0fca6ea1SDimitry Andric for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) { 55*0fca6ea1SDimitry Andric __extremum.compare_exchange_weak(__old, __k); 56*0fca6ea1SDimitry Andric } 57*0fca6ea1SDimitry Andric } 58*0fca6ea1SDimitry Andric } 59*0fca6ea1SDimitry Andric }); 60*0fca6ea1SDimitry Andric if (!__res) 61*0fca6ea1SDimitry Andric return nullopt; 62*0fca6ea1SDimitry Andric return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last; 63*0fca6ea1SDimitry Andric } 64*0fca6ea1SDimitry Andric 65*0fca6ea1SDimitry Andric template <class _Backend, class _Index, class _DifferenceType, class _Compare> 66*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI _Index 67*0fca6ea1SDimitry Andric __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept { 68*0fca6ea1SDimitry Andric // Experiments show good block sizes like this 69*0fca6ea1SDimitry Andric const _DifferenceType __block_size = 8; 70*0fca6ea1SDimitry Andric alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0}; 71*0fca6ea1SDimitry Andric while (__end - __begin >= __block_size) { 72*0fca6ea1SDimitry Andric _DifferenceType __found = 0; 73*0fca6ea1SDimitry Andric _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) { 74*0fca6ea1SDimitry Andric const _DifferenceType __t = __comp(__first, __i); 75*0fca6ea1SDimitry Andric __lane[__i - __begin] = __t; 76*0fca6ea1SDimitry Andric __found |= __t; 77*0fca6ea1SDimitry Andric } 78*0fca6ea1SDimitry Andric if (__found) { 79*0fca6ea1SDimitry Andric _DifferenceType __i; 80*0fca6ea1SDimitry Andric // This will vectorize 81*0fca6ea1SDimitry Andric for (__i = 0; __i < __block_size; ++__i) { 82*0fca6ea1SDimitry Andric if (__lane[__i]) { 83*0fca6ea1SDimitry Andric break; 84*0fca6ea1SDimitry Andric } 85*0fca6ea1SDimitry Andric } 86*0fca6ea1SDimitry Andric return __first + __begin + __i; 87*0fca6ea1SDimitry Andric } 88*0fca6ea1SDimitry Andric __begin += __block_size; 89*0fca6ea1SDimitry Andric } 90*0fca6ea1SDimitry Andric 91*0fca6ea1SDimitry Andric // Keep remainder scalar 92*0fca6ea1SDimitry Andric while (__begin != __end) { 93*0fca6ea1SDimitry Andric if (__comp(__first, __begin)) { 94*0fca6ea1SDimitry Andric return __first + __begin; 95*0fca6ea1SDimitry Andric } 96*0fca6ea1SDimitry Andric ++__begin; 97*0fca6ea1SDimitry Andric } 98*0fca6ea1SDimitry Andric return __first + __end; 99*0fca6ea1SDimitry Andric } 100*0fca6ea1SDimitry Andric 101*0fca6ea1SDimitry Andric template <class _Backend, class _RawExecutionPolicy> 102*0fca6ea1SDimitry Andric struct __cpu_parallel_find_if { 103*0fca6ea1SDimitry Andric template <class _Policy, class _ForwardIterator, class _Predicate> 104*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator> 105*0fca6ea1SDimitry Andric operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept { 106*0fca6ea1SDimitry Andric if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> && 107*0fca6ea1SDimitry Andric __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { 108*0fca6ea1SDimitry Andric return __pstl::__parallel_find<_Backend>( 109*0fca6ea1SDimitry Andric __first, 110*0fca6ea1SDimitry Andric __last, 111*0fca6ea1SDimitry Andric [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { 112*0fca6ea1SDimitry Andric using _FindIfUnseq = __pstl::__find_if<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>; 113*0fca6ea1SDimitry Andric auto __res = _FindIfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred); 114*0fca6ea1SDimitry Andric _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); 115*0fca6ea1SDimitry Andric return *std::move(__res); 116*0fca6ea1SDimitry Andric }, 117*0fca6ea1SDimitry Andric less<>{}, 118*0fca6ea1SDimitry Andric true); 119*0fca6ea1SDimitry Andric } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> && 120*0fca6ea1SDimitry Andric __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { 121*0fca6ea1SDimitry Andric using __diff_t = __iter_diff_t<_ForwardIterator>; 122*0fca6ea1SDimitry Andric return __pstl::__simd_first<_Backend>( 123*0fca6ea1SDimitry Andric __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) { 124*0fca6ea1SDimitry Andric return __pred(__iter[__i]); 125*0fca6ea1SDimitry Andric }); 126*0fca6ea1SDimitry Andric } else { 127*0fca6ea1SDimitry Andric return std::find_if(__first, __last, __pred); 128*0fca6ea1SDimitry Andric } 129*0fca6ea1SDimitry Andric } 130*0fca6ea1SDimitry Andric }; 131*0fca6ea1SDimitry Andric 132*0fca6ea1SDimitry Andric } // namespace __pstl 133*0fca6ea1SDimitry Andric _LIBCPP_END_NAMESPACE_STD 134*0fca6ea1SDimitry Andric 135*0fca6ea1SDimitry Andric _LIBCPP_POP_MACROS 136*0fca6ea1SDimitry Andric 137*0fca6ea1SDimitry Andric #endif // _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H 138