1 // -*- C++ -*-
2 // -*-===----------------------------------------------------------------------===//
3 //
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7 // See https://llvm.org/LICENSE.txt for license information.
8 //
9 //===----------------------------------------------------------------------===//
10
11 #ifndef _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H
12 #define _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H
13
14 #include "util.h"
15
16 namespace __pstl
17 {
18 namespace __omp_backend
19 {
20
21 //------------------------------------------------------------------------
22 // parallel_transform_reduce
23 //
24 // Notation:
25 // r(i,j,init) returns reduction of init with reduction over [i,j)
26 // u(i) returns f(i,i+1,identity) for a hypothetical left identity element
27 // of r c(x,y) combines values x and y that were the result of r or u
28 //------------------------------------------------------------------------
29
30 template <class _RandomAccessIterator, class _UnaryOp, class _Value, class _Combiner, class _Reduction>
31 auto
__transform_reduce_body(_RandomAccessIterator __first,_RandomAccessIterator __last,_UnaryOp __unary_op,_Value __init,_Combiner __combiner,_Reduction __reduction)32 __transform_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryOp __unary_op, _Value __init,
33 _Combiner __combiner, _Reduction __reduction)
34 {
35 const std::size_t __num_threads = omp_get_num_threads();
36 const std::size_t __size = __last - __first;
37
38 // Initial partition of the iteration space into chunks. If the range is too small,
39 // this will result in a nonsense policy, so we check on the size as well below.
40 auto __policy = __omp_backend::__chunk_partitioner(__first + __num_threads, __last);
41
42 if (__size <= __num_threads || __policy.__n_chunks < 2)
43 {
44 return __reduction(__first, __last, __init);
45 }
46
47 // Here, we cannot use OpenMP UDR because we must store the init value in
48 // the combiner and it will be used several times. Although there should be
49 // the only one; we manually generate the identity elements for each thread.
50 std::vector<_Value> __accums;
51 __accums.reserve(__num_threads);
52
53 // initialize accumulators for all threads
54 for (std::size_t __i = 0; __i < __num_threads; ++__i)
55 {
56 __accums.emplace_back(__unary_op(__first + __i));
57 }
58
59 // main loop
60 _PSTL_PRAGMA(omp taskloop shared(__accums))
61 for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
62 {
63 __pstl::__omp_backend::__process_chunk(__policy, __first + __num_threads, __chunk,
64 [&](auto __chunk_first, auto __chunk_last)
65 {
66 auto __thread_num = omp_get_thread_num();
67 __accums[__thread_num] =
68 __reduction(__chunk_first, __chunk_last, __accums[__thread_num]);
69 });
70 }
71
72 // combine by accumulators
73 for (std::size_t __i = 0; __i < __num_threads; ++__i)
74 {
75 __init = __combiner(__init, __accums[__i]);
76 }
77
78 return __init;
79 }
80
81 template <class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryOp, class _Value, class _Combiner,
82 class _Reduction>
83 _Value
__parallel_transform_reduce(__pstl::__internal::__openmp_backend_tag,_ExecutionPolicy &&,_RandomAccessIterator __first,_RandomAccessIterator __last,_UnaryOp __unary_op,_Value __init,_Combiner __combiner,_Reduction __reduction)84 __parallel_transform_reduce(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first,
85 _RandomAccessIterator __last, _UnaryOp __unary_op, _Value __init, _Combiner __combiner,
86 _Reduction __reduction)
87 {
88 _Value __result = __init;
89 if (omp_in_parallel())
90 {
91 // We don't create a nested parallel region in an existing parallel
92 // region: just create tasks
93 __result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner,
94 __reduction);
95 }
96 else
97 {
98 // Create a parallel region, and a single thread will create tasks
99 // for the region.
100 _PSTL_PRAGMA(omp parallel)
101 _PSTL_PRAGMA(omp single nowait)
102 {
103 __result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner,
104 __reduction);
105 }
106 }
107
108 return __result;
109 }
110
111 } // namespace __omp_backend
112 } // namespace __pstl
113 #endif // _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H
114