xref: /freebsd-src/contrib/llvm-project/libcxx/include/__pstl/cpu_algos/find_if.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
2*0fca6ea1SDimitry Andric //
3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0fca6ea1SDimitry Andric //
7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
8*0fca6ea1SDimitry Andric 
9*0fca6ea1SDimitry Andric #ifndef _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
10*0fca6ea1SDimitry Andric #define _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
11*0fca6ea1SDimitry Andric 
12*0fca6ea1SDimitry Andric #include <__algorithm/find_if.h>
13*0fca6ea1SDimitry Andric #include <__assert>
14*0fca6ea1SDimitry Andric #include <__atomic/atomic.h>
15*0fca6ea1SDimitry Andric #include <__config>
16*0fca6ea1SDimitry Andric #include <__functional/operations.h>
17*0fca6ea1SDimitry Andric #include <__iterator/concepts.h>
18*0fca6ea1SDimitry Andric #include <__iterator/iterator_traits.h>
19*0fca6ea1SDimitry Andric #include <__pstl/backend_fwd.h>
20*0fca6ea1SDimitry Andric #include <__pstl/cpu_algos/cpu_traits.h>
21*0fca6ea1SDimitry Andric #include <__type_traits/is_execution_policy.h>
22*0fca6ea1SDimitry Andric #include <__utility/move.h>
23*0fca6ea1SDimitry Andric #include <__utility/pair.h>
24*0fca6ea1SDimitry Andric #include <cstddef>
25*0fca6ea1SDimitry Andric #include <optional>
26*0fca6ea1SDimitry Andric 
27*0fca6ea1SDimitry Andric #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
28*0fca6ea1SDimitry Andric #  pragma GCC system_header
29*0fca6ea1SDimitry Andric #endif
30*0fca6ea1SDimitry Andric 
31*0fca6ea1SDimitry Andric _LIBCPP_PUSH_MACROS
32*0fca6ea1SDimitry Andric #include <__undef_macros>
33*0fca6ea1SDimitry Andric 
34*0fca6ea1SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD
35*0fca6ea1SDimitry Andric namespace __pstl {
36*0fca6ea1SDimitry Andric 
37*0fca6ea1SDimitry Andric template <class _Backend, class _Index, class _Brick, class _Compare>
38*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI optional<_Index>
39*0fca6ea1SDimitry Andric __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) {
40*0fca6ea1SDimitry Andric   typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
41*0fca6ea1SDimitry Andric   const _DifferenceType __n      = __last - __first;
42*0fca6ea1SDimitry Andric   _DifferenceType __initial_dist = __b_first ? __n : -1;
43*0fca6ea1SDimitry Andric   std::atomic<_DifferenceType> __extremum(__initial_dist);
44*0fca6ea1SDimitry Andric   // TODO: find out what is better here: parallel_for or parallel_reduce
45*0fca6ea1SDimitry Andric   auto __res =
46*0fca6ea1SDimitry Andric       __cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
47*0fca6ea1SDimitry Andric         // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
48*0fca6ea1SDimitry Andric         // why using a shared variable scales fairly well in this situation.
49*0fca6ea1SDimitry Andric         if (__comp(__i - __first, __extremum)) {
50*0fca6ea1SDimitry Andric           _Index __result = __f(__i, __j);
51*0fca6ea1SDimitry Andric           // If not '__last' returned then we found what we want so put this to extremum
52*0fca6ea1SDimitry Andric           if (__result != __j) {
53*0fca6ea1SDimitry Andric             const _DifferenceType __k = __result - __first;
54*0fca6ea1SDimitry Andric             for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) {
55*0fca6ea1SDimitry Andric               __extremum.compare_exchange_weak(__old, __k);
56*0fca6ea1SDimitry Andric             }
57*0fca6ea1SDimitry Andric           }
58*0fca6ea1SDimitry Andric         }
59*0fca6ea1SDimitry Andric       });
60*0fca6ea1SDimitry Andric   if (!__res)
61*0fca6ea1SDimitry Andric     return nullopt;
62*0fca6ea1SDimitry Andric   return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last;
63*0fca6ea1SDimitry Andric }
64*0fca6ea1SDimitry Andric 
65*0fca6ea1SDimitry Andric template <class _Backend, class _Index, class _DifferenceType, class _Compare>
66*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI _Index
67*0fca6ea1SDimitry Andric __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
68*0fca6ea1SDimitry Andric   // Experiments show good block sizes like this
69*0fca6ea1SDimitry Andric   const _DifferenceType __block_size                                                = 8;
70*0fca6ea1SDimitry Andric   alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
71*0fca6ea1SDimitry Andric   while (__end - __begin >= __block_size) {
72*0fca6ea1SDimitry Andric     _DifferenceType __found = 0;
73*0fca6ea1SDimitry Andric     _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
74*0fca6ea1SDimitry Andric       const _DifferenceType __t = __comp(__first, __i);
75*0fca6ea1SDimitry Andric       __lane[__i - __begin]     = __t;
76*0fca6ea1SDimitry Andric       __found |= __t;
77*0fca6ea1SDimitry Andric     }
78*0fca6ea1SDimitry Andric     if (__found) {
79*0fca6ea1SDimitry Andric       _DifferenceType __i;
80*0fca6ea1SDimitry Andric       // This will vectorize
81*0fca6ea1SDimitry Andric       for (__i = 0; __i < __block_size; ++__i) {
82*0fca6ea1SDimitry Andric         if (__lane[__i]) {
83*0fca6ea1SDimitry Andric           break;
84*0fca6ea1SDimitry Andric         }
85*0fca6ea1SDimitry Andric       }
86*0fca6ea1SDimitry Andric       return __first + __begin + __i;
87*0fca6ea1SDimitry Andric     }
88*0fca6ea1SDimitry Andric     __begin += __block_size;
89*0fca6ea1SDimitry Andric   }
90*0fca6ea1SDimitry Andric 
91*0fca6ea1SDimitry Andric   // Keep remainder scalar
92*0fca6ea1SDimitry Andric   while (__begin != __end) {
93*0fca6ea1SDimitry Andric     if (__comp(__first, __begin)) {
94*0fca6ea1SDimitry Andric       return __first + __begin;
95*0fca6ea1SDimitry Andric     }
96*0fca6ea1SDimitry Andric     ++__begin;
97*0fca6ea1SDimitry Andric   }
98*0fca6ea1SDimitry Andric   return __first + __end;
99*0fca6ea1SDimitry Andric }
100*0fca6ea1SDimitry Andric 
101*0fca6ea1SDimitry Andric template <class _Backend, class _RawExecutionPolicy>
102*0fca6ea1SDimitry Andric struct __cpu_parallel_find_if {
103*0fca6ea1SDimitry Andric   template <class _Policy, class _ForwardIterator, class _Predicate>
104*0fca6ea1SDimitry Andric   _LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator>
105*0fca6ea1SDimitry Andric   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
106*0fca6ea1SDimitry Andric     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
107*0fca6ea1SDimitry Andric                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
108*0fca6ea1SDimitry Andric       return __pstl::__parallel_find<_Backend>(
109*0fca6ea1SDimitry Andric           __first,
110*0fca6ea1SDimitry Andric           __last,
111*0fca6ea1SDimitry Andric           [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
112*0fca6ea1SDimitry Andric             using _FindIfUnseq = __pstl::__find_if<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
113*0fca6ea1SDimitry Andric             auto __res = _FindIfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
114*0fca6ea1SDimitry Andric             _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
115*0fca6ea1SDimitry Andric             return *std::move(__res);
116*0fca6ea1SDimitry Andric           },
117*0fca6ea1SDimitry Andric           less<>{},
118*0fca6ea1SDimitry Andric           true);
119*0fca6ea1SDimitry Andric     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
120*0fca6ea1SDimitry Andric                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
121*0fca6ea1SDimitry Andric       using __diff_t = __iter_diff_t<_ForwardIterator>;
122*0fca6ea1SDimitry Andric       return __pstl::__simd_first<_Backend>(
123*0fca6ea1SDimitry Andric           __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
124*0fca6ea1SDimitry Andric             return __pred(__iter[__i]);
125*0fca6ea1SDimitry Andric           });
126*0fca6ea1SDimitry Andric     } else {
127*0fca6ea1SDimitry Andric       return std::find_if(__first, __last, __pred);
128*0fca6ea1SDimitry Andric     }
129*0fca6ea1SDimitry Andric   }
130*0fca6ea1SDimitry Andric };
131*0fca6ea1SDimitry Andric 
132*0fca6ea1SDimitry Andric } // namespace __pstl
133*0fca6ea1SDimitry Andric _LIBCPP_END_NAMESPACE_STD
134*0fca6ea1SDimitry Andric 
135*0fca6ea1SDimitry Andric _LIBCPP_POP_MACROS
136*0fca6ea1SDimitry Andric 
137*0fca6ea1SDimitry Andric #endif // _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
138