1 // -*- C++ -*- 2 //===-- numeric_impl.h ----------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _PSTL_NUMERIC_IMPL_H 11 #define _PSTL_NUMERIC_IMPL_H 12 13 #include <iterator> 14 #include <type_traits> 15 #include <numeric> 16 17 #include "parallel_backend.h" 18 #include "pstl_config.h" 19 #include "execution_impl.h" 20 #include "unseq_backend_simd.h" 21 #include "algorithm_fwd.h" 22 23 namespace __pstl 24 { 25 namespace __internal 26 { 27 28 //------------------------------------------------------------------------ 29 // transform_reduce (version with two binary functions, according to draft N4659) 30 //------------------------------------------------------------------------ 31 32 template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> 33 _Tp 34 __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, 35 _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, 36 /*is_vector=*/std::false_type) noexcept 37 { 38 return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2); 39 } 40 41 template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> 42 _Tp 43 __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, 44 _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, 45 /*is_vector=*/std::true_type) noexcept 46 { 47 typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; 48 return __unseq_backend::__simd_transform_reduce( 49 __last1 - __first1, __init, __binary_op1, 50 [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); }); 51 } 52 53 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, 54 class _BinaryOperation2, class _IsVector> 55 _Tp 56 __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, 57 _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, 58 _BinaryOperation2 __binary_op2, _IsVector __is_vector, 59 /*is_parallel=*/std::false_type) noexcept 60 { 61 return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector); 62 } 63 64 template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp, 65 class _BinaryOperation1, class _BinaryOperation2, class _IsVector> 66 _Tp 67 __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, 68 _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, 69 _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/std::true_type) 70 { 71 return __internal::__except_handler([&]() { 72 return __par_backend::__parallel_transform_reduce( 73 std::forward<_ExecutionPolicy>(__exec), __first1, __last1, 74 [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { 75 return __binary_op2(*__i, *(__first2 + (__i - __first1))); 76 }, 77 __init, 78 __binary_op1, // Combine 79 [__first1, __first2, __binary_op1, __binary_op2, 80 __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp { 81 return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1, 82 __binary_op2, __is_vector); 83 }); 84 }); 85 } 86 87 //------------------------------------------------------------------------ 88 // transform_reduce (version with unary and binary functions) 89 //------------------------------------------------------------------------ 90 91 template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation> 92 _Tp 93 __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, 94 _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept 95 { 96 for (; __first != __last; ++__first) 97 { 98 __init = __binary_op(__init, __unary_op(*__first)); 99 } 100 return __init; 101 } 102 103 template <class _ForwardIterator, class _Tp, class _UnaryOperation, class _BinaryOperation> 104 _Tp 105 __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, 106 _UnaryOperation __unary_op, /*is_vector=*/std::true_type) noexcept 107 { 108 typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; 109 return __unseq_backend::__simd_transform_reduce( 110 __last - __first, __init, __binary_op, 111 [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); }); 112 } 113 114 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation, 115 class _IsVector> 116 _Tp 117 __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, 118 _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, 119 /*is_parallel=*/std::false_type) noexcept 120 { 121 return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector); 122 } 123 124 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation, 125 class _IsVector> 126 _Tp 127 __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, 128 _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, 129 /*is_parallel=*/std::true_type) 130 { 131 return __internal::__except_handler([&]() { 132 return __par_backend::__parallel_transform_reduce( 133 std::forward<_ExecutionPolicy>(__exec), __first, __last, 134 [__unary_op](_ForwardIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, 135 [__unary_op, __binary_op, __is_vector](_ForwardIterator __i, _ForwardIterator __j, _Tp __init) { 136 return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector); 137 }); 138 }); 139 } 140 141 //------------------------------------------------------------------------ 142 // transform_exclusive_scan 143 // 144 // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) 145 //------------------------------------------------------------------------ 146 147 // Exclusive form 148 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> 149 std::pair<_OutputIterator, _Tp> 150 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, 151 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 152 /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept 153 { 154 for (; __first != __last; ++__first, ++__result) 155 { 156 *__result = __init; 157 _PSTL_PRAGMA_FORCEINLINE 158 __init = __binary_op(__init, __unary_op(*__first)); 159 } 160 return std::make_pair(__result, __init); 161 } 162 163 // Inclusive form 164 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> 165 std::pair<_OutputIterator, _Tp> 166 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, 167 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 168 /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept 169 { 170 for (; __first != __last; ++__first, ++__result) 171 { 172 _PSTL_PRAGMA_FORCEINLINE 173 __init = __binary_op(__init, __unary_op(*__first)); 174 *__result = __init; 175 } 176 return std::make_pair(__result, __init); 177 } 178 179 // type is arithmetic and binary operation is a user defined operation. 180 template <typename _Tp, typename _BinaryOperation> 181 using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value && 182 !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>; 183 184 // [restriction] - T shall be DefaultConstructible. 185 // [violation] - default ctor of T shall set the identity value for binary_op. 186 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, 187 class _Inclusive> 188 typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type 189 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, 190 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, 191 /*is_vector=*/std::true_type) noexcept 192 { 193 #if (_PSTL_UDS_PRESENT) 194 return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op, 195 _Inclusive()); 196 #else 197 // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value 198 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), 199 /*is_vector=*/std::false_type()); 200 #endif 201 } 202 203 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, 204 class _Inclusive> 205 typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type 206 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, 207 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, 208 /*is_vector=*/std::true_type) noexcept 209 { 210 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), 211 /*is_vector=*/std::false_type()); 212 } 213 214 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, 215 class _BinaryOperation, class _Inclusive, class _IsVector> 216 _OutputIterator 217 __pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, 218 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 219 _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept 220 { 221 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), 222 __is_vector) 223 .first; 224 } 225 226 template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, 227 class _BinaryOperation, class _Inclusive, class _IsVector> 228 typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type 229 __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, 230 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 231 _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type) 232 { 233 typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; 234 235 return __internal::__except_handler([&]() { 236 __par_backend::__parallel_transform_scan( 237 std::forward<_ExecutionPolicy>(__exec), __last - __first, 238 [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, 239 __binary_op, 240 [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) { 241 // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan. 242 return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op, 243 __unary_op, 244 /*__is_vector*/ std::false_type()); 245 }, 246 [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j, 247 _Tp __init) { 248 return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, 249 __init, __binary_op, _Inclusive(), __is_vector) 250 .second; 251 }); 252 return __result + (__last - __first); 253 }); 254 } 255 256 template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, 257 class _BinaryOperation, class _Inclusive, class _IsVector> 258 typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type 259 __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, 260 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 261 _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type) 262 { 263 typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; 264 _DifferenceType __n = __last - __first; 265 266 if (__n <= 0) 267 { 268 return __result; 269 } 270 return __internal::__except_handler([&]() { 271 __par_backend::__parallel_strict_scan( 272 std::forward<_ExecutionPolicy>(__exec), __n, __init, 273 [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) { 274 return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, 275 __unary_op, _Tp{}, __binary_op, _Inclusive(), __is_vector) 276 .second; 277 }, 278 __binary_op, 279 [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) { 280 return *(std::transform(__result + __i, __result + __i + __len, __result + __i, 281 [&__initial, &__binary_op](const _Tp& __x) { 282 _PSTL_PRAGMA_FORCEINLINE 283 return __binary_op(__initial, __x); 284 }) - 285 1); 286 }, 287 [](_Tp __res) {}); 288 return __result + (__last - __first); 289 }); 290 } 291 292 //------------------------------------------------------------------------ 293 // adjacent_difference 294 //------------------------------------------------------------------------ 295 296 template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation> 297 _OutputIterator 298 __brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first, 299 _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept 300 { 301 return std::adjacent_difference(__first, __last, __d_first, __op); 302 } 303 304 template <class _ForwardIterator1, class _ForwardIterator2, class BinaryOperation> 305 _ForwardIterator2 306 __brick_adjacent_difference(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first, 307 BinaryOperation __op, /*is_vector=*/std::true_type) noexcept 308 { 309 _PSTL_ASSERT(__first != __last); 310 311 typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; 312 typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; 313 314 auto __n = __last - __first; 315 *__d_first = *__first; 316 return __unseq_backend::__simd_walk_3( 317 __first + 1, __n - 1, __first, __d_first + 1, 318 [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); }); 319 } 320 321 template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation, 322 class _IsVector> 323 _OutputIterator 324 __pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, 325 _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector, 326 /*is_parallel*/ std::false_type) noexcept 327 { 328 return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector); 329 } 330 331 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation, 332 class _IsVector> 333 _ForwardIterator2 334 __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, 335 _ForwardIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector, 336 /*is_parallel=*/std::true_type) 337 { 338 _PSTL_ASSERT(__first != __last); 339 typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; 340 typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; 341 342 *__d_first = *__first; 343 __par_backend::__parallel_for( 344 std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, 345 [&__op, __is_vector, __d_first, __first](_ForwardIterator1 __b, _ForwardIterator1 __e) { 346 _ForwardIterator2 __d_b = __d_first + (__b - __first); 347 __internal::__brick_walk3( 348 __b, __e, __b + 1, __d_b + 1, 349 [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, 350 __is_vector); 351 }); 352 return __d_first + (__last - __first); 353 } 354 355 } // namespace __internal 356 } // namespace __pstl 357 358 #endif /* _PSTL_NUMERIC_IMPL_H */ 359