1 // -*- C++ -*-
2 //===-- numeric_impl.h ----------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9
10 #ifndef _PSTL_NUMERIC_IMPL_H
11 #define _PSTL_NUMERIC_IMPL_H
12
13 #include <iterator>
14 #include <type_traits>
15 #include <numeric>
16
17 #include "parallel_backend.h"
18 #include "pstl_config.h"
19 #include "execution_impl.h"
20 #include "unseq_backend_simd.h"
21 #include "algorithm_fwd.h"
22
23 namespace __pstl
24 {
25 namespace __internal
26 {
27
28 //------------------------------------------------------------------------
29 // transform_reduce (version with two binary functions, according to draft N4659)
30 //------------------------------------------------------------------------
31
32 template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
33 _Tp
__brick_transform_reduce(_ForwardIterator1 __first1,_ForwardIterator1 __last1,_ForwardIterator2 __first2,_Tp __init,_BinaryOperation1 __binary_op1,_BinaryOperation2 __binary_op2,std::false_type)34 __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init,
35 _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2,
36 /*is_vector=*/std::false_type) noexcept
37 {
38 return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2);
39 }
40
41 template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
42 _Tp
__brick_transform_reduce(_ForwardIterator1 __first1,_ForwardIterator1 __last1,_ForwardIterator2 __first2,_Tp __init,_BinaryOperation1 __binary_op1,_BinaryOperation2 __binary_op2,std::true_type)43 __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init,
44 _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2,
45 /*is_vector=*/std::true_type) noexcept
46 {
47 typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
48 return __unseq_backend::__simd_transform_reduce(
49 __last1 - __first1, __init, __binary_op1,
50 [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); });
51 }
52
53 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1,
54 class _BinaryOperation2, class _IsVector>
55 _Tp
__pattern_transform_reduce(_ExecutionPolicy &&,_ForwardIterator1 __first1,_ForwardIterator1 __last1,_ForwardIterator2 __first2,_Tp __init,_BinaryOperation1 __binary_op1,_BinaryOperation2 __binary_op2,_IsVector __is_vector,std::false_type)56 __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
57 _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
58 _BinaryOperation2 __binary_op2, _IsVector __is_vector,
59 /*is_parallel=*/std::false_type) noexcept
60 {
61 return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector);
62 }
63
64 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp,
65 class _BinaryOperation1, class _BinaryOperation2, class _IsVector>
66 _Tp
__pattern_transform_reduce(_ExecutionPolicy && __exec,_RandomAccessIterator1 __first1,_RandomAccessIterator1 __last1,_RandomAccessIterator2 __first2,_Tp __init,_BinaryOperation1 __binary_op1,_BinaryOperation2 __binary_op2,_IsVector __is_vector,std::true_type)67 __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
68 _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
69 _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/std::true_type)
70 {
71 return __internal::__except_handler([&]() {
72 return __par_backend::__parallel_transform_reduce(
73 std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
74 [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable {
75 return __binary_op2(*__i, *(__first2 + (__i - __first1)));
76 },
77 __init,
78 __binary_op1, // Combine
79 [__first1, __first2, __binary_op1, __binary_op2,
80 __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp {
81 return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1,
82 __binary_op2, __is_vector);
83 });
84 });
85 }
86
87 //------------------------------------------------------------------------
88 // transform_reduce (version with unary and binary functions)
89 //------------------------------------------------------------------------
90
91 template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
92 _Tp
__brick_transform_reduce(_ForwardIterator __first,_ForwardIterator __last,_Tp __init,_BinaryOperation __binary_op,_UnaryOperation __unary_op,std::false_type)93 __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op,
94 _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept
95 {
96 return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op);
97 }
98
99 template <class _ForwardIterator, class _Tp, class _UnaryOperation, class _BinaryOperation>
100 _Tp
__brick_transform_reduce(_ForwardIterator __first,_ForwardIterator __last,_Tp __init,_BinaryOperation __binary_op,_UnaryOperation __unary_op,std::true_type)101 __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op,
102 _UnaryOperation __unary_op, /*is_vector=*/std::true_type) noexcept
103 {
104 typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType;
105 return __unseq_backend::__simd_transform_reduce(
106 __last - __first, __init, __binary_op,
107 [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); });
108 }
109
110 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation,
111 class _IsVector>
112 _Tp
__pattern_transform_reduce(_ExecutionPolicy &&,_ForwardIterator __first,_ForwardIterator __last,_Tp __init,_BinaryOperation __binary_op,_UnaryOperation __unary_op,_IsVector __is_vector,std::false_type)113 __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
114 _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector,
115 /*is_parallel=*/std::false_type) noexcept
116 {
117 return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector);
118 }
119
120 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation,
121 class _IsVector>
122 _Tp
__pattern_transform_reduce(_ExecutionPolicy && __exec,_ForwardIterator __first,_ForwardIterator __last,_Tp __init,_BinaryOperation __binary_op,_UnaryOperation __unary_op,_IsVector __is_vector,std::true_type)123 __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
124 _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector,
125 /*is_parallel=*/std::true_type)
126 {
127 return __internal::__except_handler([&]() {
128 return __par_backend::__parallel_transform_reduce(
129 std::forward<_ExecutionPolicy>(__exec), __first, __last,
130 [__unary_op](_ForwardIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op,
131 [__unary_op, __binary_op, __is_vector](_ForwardIterator __i, _ForwardIterator __j, _Tp __init) {
132 return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector);
133 });
134 });
135 }
136
137 //------------------------------------------------------------------------
138 // transform_exclusive_scan
139 //
140 // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...)
141 //------------------------------------------------------------------------
142
143 // Exclusive form
144 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
145 std::pair<_OutputIterator, _Tp>
__brick_transform_scan(_ForwardIterator __first,_ForwardIterator __last,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,std::false_type,std::false_type)146 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
147 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
148 /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept
149 {
150 for (; __first != __last; ++__first, ++__result)
151 {
152 *__result = __init;
153 _PSTL_PRAGMA_FORCEINLINE
154 __init = __binary_op(__init, __unary_op(*__first));
155 }
156 return std::make_pair(__result, __init);
157 }
158
159 // Inclusive form
160 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
161 std::pair<_OutputIterator, _Tp>
__brick_transform_scan(_ForwardIterator __first,_ForwardIterator __last,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,std::true_type,std::false_type)162 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
163 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
164 /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept
165 {
166 for (; __first != __last; ++__first, ++__result)
167 {
168 _PSTL_PRAGMA_FORCEINLINE
169 __init = __binary_op(__init, __unary_op(*__first));
170 *__result = __init;
171 }
172 return std::make_pair(__result, __init);
173 }
174
175 // type is arithmetic and binary operation is a user defined operation.
176 template <typename _Tp, typename _BinaryOperation>
177 using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value &&
178 !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>;
179
180 // [restriction] - T shall be DefaultConstructible.
181 // [violation] - default ctor of T shall set the identity value for binary_op.
182 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
183 class _Inclusive>
184 typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__brick_transform_scan(_ForwardIterator __first,_ForwardIterator __last,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,_Inclusive,std::true_type)185 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
186 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
187 /*is_vector=*/std::true_type) noexcept
188 {
189 #if (_PSTL_UDS_PRESENT)
190 return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op,
191 _Inclusive());
192 #else
193 // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value
194 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
195 /*is_vector=*/std::false_type());
196 #endif
197 }
198
199 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
200 class _Inclusive>
201 typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__brick_transform_scan(_ForwardIterator __first,_ForwardIterator __last,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,_Inclusive,std::true_type)202 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
203 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
204 /*is_vector=*/std::true_type) noexcept
205 {
206 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
207 /*is_vector=*/std::false_type());
208 }
209
210 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp,
211 class _BinaryOperation, class _Inclusive, class _IsVector>
212 _OutputIterator
__pattern_transform_scan(_ExecutionPolicy &&,_ForwardIterator __first,_ForwardIterator __last,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,_Inclusive,_IsVector __is_vector,std::false_type)213 __pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
214 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
215 _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
216 {
217 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
218 __is_vector)
219 .first;
220 }
221
222 template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp,
223 class _BinaryOperation, class _Inclusive, class _IsVector>
224 typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type
__pattern_transform_scan(_ExecutionPolicy && __exec,_RandomAccessIterator __first,_RandomAccessIterator __last,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,_Inclusive,_IsVector __is_vector,std::true_type)225 __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
226 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
227 _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type)
228 {
229 typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
230
231 return __internal::__except_handler([&]() {
232 __par_backend::__parallel_transform_scan(
233 std::forward<_ExecutionPolicy>(__exec), __last - __first,
234 [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init,
235 __binary_op,
236 [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) {
237 // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan.
238 return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op,
239 __unary_op,
240 /*__is_vector*/ std::false_type());
241 },
242 [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j,
243 _Tp __init) {
244 return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op,
245 __init, __binary_op, _Inclusive(), __is_vector)
246 .second;
247 });
248 return __result + (__last - __first);
249 });
250 }
251
252 template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp,
253 class _BinaryOperation, class _Inclusive, class _IsVector>
254 typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type
__pattern_transform_scan(_ExecutionPolicy && __exec,_RandomAccessIterator __first,_RandomAccessIterator __last,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,_Inclusive,_IsVector __is_vector,std::true_type)255 __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
256 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
257 _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type)
258 {
259 typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
260 _DifferenceType __n = __last - __first;
261
262 if (__n <= 0)
263 {
264 return __result;
265 }
266 return __internal::__except_handler([&]() {
267 __par_backend::__parallel_strict_scan(
268 std::forward<_ExecutionPolicy>(__exec), __n, __init,
269 [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) {
270 return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i,
271 __unary_op, _Tp{}, __binary_op, _Inclusive(), __is_vector)
272 .second;
273 },
274 __binary_op,
275 [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) {
276 return *(std::transform(__result + __i, __result + __i + __len, __result + __i,
277 [&__initial, &__binary_op](const _Tp& __x) {
278 _PSTL_PRAGMA_FORCEINLINE
279 return __binary_op(__initial, __x);
280 }) -
281 1);
282 },
283 [](_Tp) {});
284 return __result + (__last - __first);
285 });
286 }
287
288 //------------------------------------------------------------------------
289 // adjacent_difference
290 //------------------------------------------------------------------------
291
292 template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation>
293 _OutputIterator
__brick_adjacent_difference(_ForwardIterator __first,_ForwardIterator __last,_OutputIterator __d_first,_BinaryOperation __op,std::false_type)294 __brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first,
295 _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept
296 {
297 return std::adjacent_difference(__first, __last, __d_first, __op);
298 }
299
300 template <class _ForwardIterator1, class _ForwardIterator2, class BinaryOperation>
301 _ForwardIterator2
__brick_adjacent_difference(_ForwardIterator1 __first,_ForwardIterator1 __last,_ForwardIterator2 __d_first,BinaryOperation __op,std::true_type)302 __brick_adjacent_difference(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first,
303 BinaryOperation __op, /*is_vector=*/std::true_type) noexcept
304 {
305 _PSTL_ASSERT(__first != __last);
306
307 typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1;
308 typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2;
309
310 auto __n = __last - __first;
311 *__d_first = *__first;
312 return __unseq_backend::__simd_walk_3(
313 __first + 1, __n - 1, __first, __d_first + 1,
314 [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); });
315 }
316
317 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation,
318 class _IsVector>
319 _OutputIterator
__pattern_adjacent_difference(_ExecutionPolicy &&,_ForwardIterator __first,_ForwardIterator __last,_OutputIterator __d_first,_BinaryOperation __op,_IsVector __is_vector,std::false_type)320 __pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
321 _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector,
322 /*is_parallel*/ std::false_type) noexcept
323 {
324 return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector);
325 }
326
327 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation,
328 class _IsVector>
329 _ForwardIterator2
__pattern_adjacent_difference(_ExecutionPolicy && __exec,_ForwardIterator1 __first,_ForwardIterator1 __last,_ForwardIterator2 __d_first,_BinaryOperation __op,_IsVector __is_vector,std::true_type)330 __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
331 _ForwardIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector,
332 /*is_parallel=*/std::true_type)
333 {
334 _PSTL_ASSERT(__first != __last);
335 typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1;
336 typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2;
337
338 *__d_first = *__first;
339 __par_backend::__parallel_for(
340 std::forward<_ExecutionPolicy>(__exec), __first, __last - 1,
341 [&__op, __is_vector, __d_first, __first](_ForwardIterator1 __b, _ForwardIterator1 __e) {
342 _ForwardIterator2 __d_b = __d_first + (__b - __first);
343 __internal::__brick_walk3(
344 __b, __e, __b + 1, __d_b + 1,
345 [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); },
346 __is_vector);
347 });
348 return __d_first + (__last - __first);
349 }
350
351 } // namespace __internal
352 } // namespace __pstl
353
354 #endif /* _PSTL_NUMERIC_IMPL_H */
355