1 // -*- C++ -*- 2 //===-- numeric_impl.h ----------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef __PSTL_numeric_impl_H 11 #define __PSTL_numeric_impl_H 12 13 #include <iterator> 14 #include <type_traits> 15 #include <numeric> 16 17 #include "execution_impl.h" 18 #include "unseq_backend_simd.h" 19 #include "algorithm_fwd.h" 20 21 #if __PSTL_USE_PAR_POLICIES 22 #include "parallel_backend.h" 23 #endif 24 25 namespace __pstl 26 { 27 namespace __internal 28 { 29 30 //------------------------------------------------------------------------ 31 // transform_reduce (version with two binary functions, according to draft N4659) 32 //------------------------------------------------------------------------ 33 34 template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> 35 _Tp 36 __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, 37 _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, 38 /*is_vector=*/std::false_type) noexcept 39 { 40 return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2); 41 } 42 43 template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> 44 _Tp 45 __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, 46 _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, 47 /*is_vector=*/std::true_type) noexcept 48 { 49 typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; 50 return __unseq_backend::__simd_transform_reduce( 51 __last1 - __first1, __init, __binary_op1, 52 [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); }); 53 } 54 55 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, 56 class _BinaryOperation2, class _IsVector> 57 _Tp 58 __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, 59 _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, 60 _BinaryOperation2 __binary_op2, _IsVector __is_vector, 61 /*is_parallel=*/std::false_type) noexcept 62 { 63 return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector); 64 } 65 66 #if __PSTL_USE_PAR_POLICIES 67 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp, 68 class _BinaryOperation1, class _BinaryOperation2, class _IsVector> 69 _Tp 70 __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, 71 _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, 72 _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/std::true_type) 73 { 74 return __internal::__except_handler([&]() { 75 return __par_backend::__parallel_transform_reduce( 76 std::forward<_ExecutionPolicy>(__exec), __first1, __last1, 77 [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { 78 return __binary_op2(*__i, *(__first2 + (__i - __first1))); 79 }, 80 __init, 81 __binary_op1, // Combine 82 [__first1, __first2, __binary_op1, __binary_op2, 83 __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp { 84 return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1, 85 __binary_op2, __is_vector); 86 }); 87 }); 88 } 89 #endif 90 91 //------------------------------------------------------------------------ 92 // transform_reduce (version with unary and binary functions) 93 //------------------------------------------------------------------------ 94 95 template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation> 96 _Tp 97 __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, 98 _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept 99 { 100 for (; __first != __last; ++__first) 101 { 102 __init = __binary_op(__init, __unary_op(*__first)); 103 } 104 return __init; 105 } 106 107 template <class _ForwardIterator, class _Tp, class _UnaryOperation, class _BinaryOperation> 108 _Tp 109 __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, 110 _UnaryOperation __unary_op, /*is_vector=*/std::true_type) noexcept 111 { 112 typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; 113 return __unseq_backend::__simd_transform_reduce( 114 __last - __first, __init, __binary_op, 115 [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); }); 116 } 117 118 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation, 119 class _IsVector> 120 _Tp 121 __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, 122 _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, 123 /*is_parallel=*/std::false_type) noexcept 124 { 125 return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector); 126 } 127 128 #if __PSTL_USE_PAR_POLICIES 129 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation, 130 class _IsVector> 131 _Tp 132 __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, 133 _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, 134 /*is_parallel=*/std::true_type) 135 { 136 return __internal::__except_handler([&]() { 137 return __par_backend::__parallel_transform_reduce( 138 std::forward<_ExecutionPolicy>(__exec), __first, __last, 139 [__unary_op](_ForwardIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, 140 [__unary_op, __binary_op, __is_vector](_ForwardIterator __i, _ForwardIterator __j, _Tp __init) { 141 return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector); 142 }); 143 }); 144 } 145 #endif 146 147 //------------------------------------------------------------------------ 148 // transform_exclusive_scan 149 // 150 // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) 151 //------------------------------------------------------------------------ 152 153 // Exclusive form 154 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> 155 std::pair<_OutputIterator, _Tp> 156 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, 157 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 158 /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept 159 { 160 for (; __first != __last; ++__first, ++__result) 161 { 162 *__result = __init; 163 __PSTL_PRAGMA_FORCEINLINE 164 __init = __binary_op(__init, __unary_op(*__first)); 165 } 166 return std::make_pair(__result, __init); 167 } 168 169 // Inclusive form 170 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> 171 std::pair<_OutputIterator, _Tp> 172 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, 173 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 174 /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept 175 { 176 for (; __first != __last; ++__first, ++__result) 177 { 178 __PSTL_PRAGMA_FORCEINLINE 179 __init = __binary_op(__init, __unary_op(*__first)); 180 *__result = __init; 181 } 182 return std::make_pair(__result, __init); 183 } 184 185 // type is arithmetic and binary operation is a user defined operation. 186 template <typename _Tp, typename _BinaryOperation> 187 using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value && 188 !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>; 189 190 // [restriction] - T shall be DefaultConstructible. 191 // [violation] - default ctor of T shall set the identity value for binary_op. 192 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, 193 class _Inclusive> 194 typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type 195 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, 196 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, 197 /*is_vector=*/std::true_type) noexcept 198 { 199 #if (__PSTL_UDS_PRESENT) 200 return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op, 201 _Inclusive()); 202 #else 203 // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value 204 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), 205 /*is_vector=*/std::false_type()); 206 #endif 207 } 208 209 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, 210 class _Inclusive> 211 typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type 212 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, 213 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, 214 /*is_vector=*/std::true_type) noexcept 215 { 216 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), 217 /*is_vector=*/std::false_type()); 218 } 219 220 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, 221 class _BinaryOperation, class _Inclusive, class _IsVector> 222 _OutputIterator 223 __pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, 224 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 225 _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept 226 { 227 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), __is_vector) 228 .first; 229 } 230 231 #if __PSTL_USE_PAR_POLICIES 232 template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, 233 class _BinaryOperation, class _Inclusive, class _IsVector> 234 typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type 235 __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, 236 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 237 _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type) 238 { 239 typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; 240 241 return __internal::__except_handler([&]() { 242 __par_backend::__parallel_transform_scan( 243 std::forward<_ExecutionPolicy>(__exec), __last - __first, 244 [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, 245 __binary_op, 246 [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) { 247 // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan. 248 return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op, __unary_op, 249 /*__is_vector*/ std::false_type()); 250 }, 251 [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j, 252 _Tp __init) { 253 return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, __init, 254 __binary_op, _Inclusive(), __is_vector) 255 .second; 256 }); 257 return __result + (__last - __first); 258 }); 259 } 260 #endif 261 262 #if __PSTL_USE_PAR_POLICIES 263 template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, 264 class _BinaryOperation, class _Inclusive, class _IsVector> 265 typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type 266 __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, 267 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 268 _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type) 269 { 270 typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; 271 _DifferenceType __n = __last - __first; 272 273 if (__n <= 0) 274 { 275 return __result; 276 } 277 return __internal::__except_handler([&]() { 278 __par_backend::__parallel_strict_scan( 279 std::forward<_ExecutionPolicy>(__exec), __n, __init, 280 [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) { 281 return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, __unary_op, _Tp{}, 282 __binary_op, _Inclusive(), __is_vector) 283 .second; 284 }, 285 __binary_op, 286 [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) { 287 return *(std::transform(__result + __i, __result + __i + __len, __result + __i, 288 [&__initial, &__binary_op](const _Tp& __x) { 289 __PSTL_PRAGMA_FORCEINLINE 290 return __binary_op(__initial, __x); 291 }) - 292 1); 293 }, 294 [](_Tp __res) {}); 295 return __result + (__last - __first); 296 }); 297 } 298 #endif 299 300 //------------------------------------------------------------------------ 301 // adjacent_difference 302 //------------------------------------------------------------------------ 303 304 template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation> 305 _OutputIterator 306 __brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first, 307 _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept 308 { 309 return std::adjacent_difference(__first, __last, __d_first, __op); 310 } 311 312 template <class _ForwardIterator1, class _ForwardIterator2, class BinaryOperation> 313 _ForwardIterator2 314 __brick_adjacent_difference(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first, 315 BinaryOperation __op, /*is_vector=*/std::true_type) noexcept 316 { 317 __PSTL_ASSERT(__first != __last); 318 319 typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; 320 typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; 321 322 auto __n = __last - __first; 323 *__d_first = *__first; 324 return __unseq_backend::__simd_walk_3( 325 __first + 1, __n - 1, __first, __d_first + 1, 326 [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); }); 327 } 328 329 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation, 330 class _IsVector> 331 _OutputIterator 332 __pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, 333 _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector, 334 /*is_parallel*/ std::false_type) noexcept 335 { 336 return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector); 337 } 338 339 #if __PSTL_USE_PAR_POLICIES 340 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation, 341 class _IsVector> 342 _ForwardIterator2 343 __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, 344 _ForwardIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector, 345 /*is_parallel=*/std::true_type) 346 { 347 __PSTL_ASSERT(__first != __last); 348 typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; 349 typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; 350 351 *__d_first = *__first; 352 __par_backend::__parallel_for( 353 std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, 354 [&__op, __is_vector, __d_first, __first](_ForwardIterator1 __b, _ForwardIterator1 __e) { 355 _ForwardIterator2 __d_b = __d_first + (__b - __first); 356 __internal::__brick_walk3( 357 __b, __e, __b + 1, __d_b + 1, 358 [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, 359 __is_vector); 360 }); 361 return __d_first + (__last - __first); 362 } 363 #endif 364 365 } // namespace __internal 366 } // namespace __pstl 367 368 #endif /* __PSTL_numeric_impl_H */ 369