1 // -*- C++ -*-
2 // -*-===----------------------------------------------------------------------===//
3 //
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7 // See https://llvm.org/LICENSE.txt for license information.
8 //
9 //===----------------------------------------------------------------------===//
10
11 #ifndef _PSTL_INTERNAL_OMP_UTIL_H
12 #define _PSTL_INTERNAL_OMP_UTIL_H
13
14 #include <algorithm>
15 #include <atomic>
16 #include <iterator>
17 #include <cstddef>
18 #include <cstdio>
19 #include <memory>
20 #include <vector>
21 #include <omp.h>
22
23 #include "../parallel_backend_utils.h"
24 #include "../unseq_backend_simd.h"
25 #include "../utils.h"
26
27 // Portability "#pragma" definition
28 #ifdef _MSC_VER
29 # define _PSTL_PRAGMA(x) __pragma(x)
30 #else
31 # define _PSTL_PRAGMA(x) _Pragma(# x)
32 #endif
33
34 _PSTL_HIDE_FROM_ABI_PUSH
35
36 namespace __pstl
37 {
38 namespace __omp_backend
39 {
40
41 //------------------------------------------------------------------------
42 // use to cancel execution
43 //------------------------------------------------------------------------
44 inline void
__cancel_execution()45 __cancel_execution()
46 {
47 // TODO: Figure out how to make cancelation work.
48 }
49
50 //------------------------------------------------------------------------
51 // raw buffer
52 //------------------------------------------------------------------------
53
54 template <typename _Tp>
55 class __buffer
56 {
57 std::allocator<_Tp> __allocator_;
58 _Tp* __ptr_;
59 const std::size_t __buf_size_;
60 __buffer(const __buffer&) = delete;
61 void
62 operator=(const __buffer&) = delete;
63
64 public:
__buffer(std::size_t __n)65 __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {}
66
67 operator bool() const { return __ptr_ != nullptr; }
68
69 _Tp*
get()70 get() const
71 {
72 return __ptr_;
73 }
~__buffer()74 ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); }
75 };
76
77 // Preliminary size of each chunk: requires further discussion
78 inline constexpr std::size_t __default_chunk_size = 2048;
79
80 // Convenience function to determine when we should run serial.
81 template <typename _Iterator, std::enable_if_t<!std::is_integral<_Iterator>::value, bool> = true>
82 constexpr auto
83 __should_run_serial(_Iterator __first, _Iterator __last) -> bool
84 {
85 using _difference_type = typename std::iterator_traits<_Iterator>::difference_type;
86 auto __size = std::distance(__first, __last);
87 return __size <= static_cast<_difference_type>(__default_chunk_size);
88 }
89
90 template <typename _Index, std::enable_if_t<std::is_integral<_Index>::value, bool> = true>
91 constexpr auto
92 __should_run_serial(_Index __first, _Index __last) -> bool
93 {
94 using _difference_type = _Index;
95 auto __size = __last - __first;
96 return __size <= static_cast<_difference_type>(__default_chunk_size);
97 }
98
99 struct __chunk_metrics
100 {
101 std::size_t __n_chunks;
102 std::size_t __chunk_size;
103 std::size_t __first_chunk_size;
104 };
105
106 // The iteration space partitioner according to __requested_chunk_size
107 template <class _RandomAccessIterator, class _Size = std::size_t>
108 auto
109 __chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last,
110 _Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics
111 {
112 /*
113 * This algorithm improves distribution of elements in chunks by avoiding
114 * small tail chunks. The leftover elements that do not fit neatly into
115 * the chunk size are redistributed to early chunks. This improves
116 * utilization of the processor's prefetch and reduces the number of
117 * tasks needed by 1.
118 */
119
120 const _Size __n = __last - __first;
121 _Size __n_chunks = 0;
122 _Size __chunk_size = 0;
123 _Size __first_chunk_size = 0;
124 if (__n < __requested_chunk_size)
125 {
126 __chunk_size = __n;
127 __first_chunk_size = __n;
128 __n_chunks = 1;
129 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
130 }
131
132 __n_chunks = (__n / __requested_chunk_size) + 1;
133 __chunk_size = __n / __n_chunks;
134 __first_chunk_size = __chunk_size;
135 const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size);
136
137 if (__n_leftover_items == __chunk_size)
138 {
139 __n_chunks += 1;
140 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
141 }
142 else if (__n_leftover_items == 0)
143 {
144 __first_chunk_size = __chunk_size;
145 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
146 }
147
148 const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks;
149 const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks);
150
151 __chunk_size += __n_extra_items_per_chunk;
152 __first_chunk_size = __chunk_size + __n_final_leftover_items;
153
154 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
155 }
156
157 template <typename _Iterator, typename _Index, typename _Func>
158 void
__process_chunk(const __chunk_metrics & __metrics,_Iterator __base,_Index __chunk_index,_Func __f)159 __process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _Func __f)
160 {
161 auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size;
162 auto __index = __chunk_index == 0 ? 0
163 : (__chunk_index * __metrics.__chunk_size) +
164 (__metrics.__first_chunk_size - __metrics.__chunk_size);
165 auto __first = __base + __index;
166 auto __last = __first + __this_chunk_size;
167 __f(__first, __last);
168 }
169
170 } // namespace __omp_backend
171 } // namespace __pstl
172
173 #endif // _PSTL_INTERNAL_OMP_UTIL_H
174