1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef _LIBCPP___CXX03___PSTL_CPU_ALGOS_FIND_IF_H 10 #define _LIBCPP___CXX03___PSTL_CPU_ALGOS_FIND_IF_H 11 12 #include <__cxx03/__algorithm/find_if.h> 13 #include <__cxx03/__assert> 14 #include <__cxx03/__atomic/atomic.h> 15 #include <__cxx03/__config> 16 #include <__cxx03/__functional/operations.h> 17 #include <__cxx03/__iterator/concepts.h> 18 #include <__cxx03/__iterator/iterator_traits.h> 19 #include <__cxx03/__pstl/backend_fwd.h> 20 #include <__cxx03/__pstl/cpu_algos/cpu_traits.h> 21 #include <__cxx03/__type_traits/is_execution_policy.h> 22 #include <__cxx03/__utility/move.h> 23 #include <__cxx03/__utility/pair.h> 24 #include <__cxx03/cstddef> 25 #include <__cxx03/optional> 26 27 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 28 # pragma GCC system_header 29 #endif 30 31 _LIBCPP_PUSH_MACROS 32 #include <__cxx03/__undef_macros> 33 34 _LIBCPP_BEGIN_NAMESPACE_STD 35 namespace __pstl { 36 37 template <class _Backend, class _Index, class _Brick, class _Compare> 38 _LIBCPP_HIDE_FROM_ABI optional<_Index> 39 __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) { 40 typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; 41 const _DifferenceType __n = __last - __first; 42 _DifferenceType __initial_dist = __b_first ? __n : -1; 43 std::atomic<_DifferenceType> __extremum(__initial_dist); 44 // TODO: find out what is better here: parallel_for or parallel_reduce 45 auto __res = 46 __cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { 47 // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of 48 // why using a shared variable scales fairly well in this situation. 49 if (__comp(__i - __first, __extremum)) { 50 _Index __result = __f(__i, __j); 51 // If not '__last' returned then we found what we want so put this to extremum 52 if (__result != __j) { 53 const _DifferenceType __k = __result - __first; 54 for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) { 55 __extremum.compare_exchange_weak(__old, __k); 56 } 57 } 58 } 59 }); 60 if (!__res) 61 return nullopt; 62 return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last; 63 } 64 65 template <class _Backend, class _Index, class _DifferenceType, class _Compare> 66 _LIBCPP_HIDE_FROM_ABI _Index 67 __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept { 68 // Experiments show good block sizes like this 69 const _DifferenceType __block_size = 8; 70 alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0}; 71 while (__end - __begin >= __block_size) { 72 _DifferenceType __found = 0; 73 _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) { 74 const _DifferenceType __t = __comp(__first, __i); 75 __lane[__i - __begin] = __t; 76 __found |= __t; 77 } 78 if (__found) { 79 _DifferenceType __i; 80 // This will vectorize 81 for (__i = 0; __i < __block_size; ++__i) { 82 if (__lane[__i]) { 83 break; 84 } 85 } 86 return __first + __begin + __i; 87 } 88 __begin += __block_size; 89 } 90 91 // Keep remainder scalar 92 while (__begin != __end) { 93 if (__comp(__first, __begin)) { 94 return __first + __begin; 95 } 96 ++__begin; 97 } 98 return __first + __end; 99 } 100 101 template <class _Backend, class _RawExecutionPolicy> 102 struct __cpu_parallel_find_if { 103 template <class _Policy, class _ForwardIterator, class _Predicate> 104 _LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator> 105 operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept { 106 if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> && 107 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { 108 return __pstl::__parallel_find<_Backend>( 109 __first, 110 __last, 111 [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { 112 using _FindIfUnseq = __pstl::__find_if<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>; 113 auto __res = _FindIfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred); 114 _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); 115 return *std::move(__res); 116 }, 117 less<>{}, 118 true); 119 } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> && 120 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { 121 using __diff_t = __iter_diff_t<_ForwardIterator>; 122 return __pstl::__simd_first<_Backend>( 123 __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) { 124 return __pred(__iter[__i]); 125 }); 126 } else { 127 return std::find_if(__first, __last, __pred); 128 } 129 } 130 }; 131 132 } // namespace __pstl 133 _LIBCPP_END_NAMESPACE_STD 134 135 _LIBCPP_POP_MACROS 136 137 #endif // _LIBCPP___CXX03___PSTL_CPU_ALGOS_FIND_IF_H 138