xref: /llvm-project/pstl/include/pstl/internal/omp/util.h (revision 6069a6a5049497a32a50a49661c2f4169078bdba)
1 // -*- C++ -*-
2 // -*-===----------------------------------------------------------------------===//
3 //
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7 // See https://llvm.org/LICENSE.txt for license information.
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #ifndef _PSTL_INTERNAL_OMP_UTIL_H
12 #define _PSTL_INTERNAL_OMP_UTIL_H
13 
14 #include <algorithm>
15 #include <atomic>
16 #include <iterator>
17 #include <cstddef>
18 #include <cstdio>
19 #include <memory>
20 #include <vector>
21 #include <omp.h>
22 
23 #include "../parallel_backend_utils.h"
24 #include "../unseq_backend_simd.h"
25 #include "../utils.h"
26 
27 // Portability "#pragma" definition
28 #ifdef _MSC_VER
29 #    define _PSTL_PRAGMA(x) __pragma(x)
30 #else
31 #    define _PSTL_PRAGMA(x) _Pragma(#    x)
32 #endif
33 
34 _PSTL_HIDE_FROM_ABI_PUSH
35 
36 namespace __pstl
37 {
38 namespace __omp_backend
39 {
40 
41 //------------------------------------------------------------------------
42 // use to cancel execution
43 //------------------------------------------------------------------------
44 inline void
__cancel_execution()45 __cancel_execution()
46 {
47     // TODO: Figure out how to make cancelation work.
48 }
49 
50 //------------------------------------------------------------------------
51 // raw buffer
52 //------------------------------------------------------------------------
53 
54 template <typename _Tp>
55 class __buffer
56 {
57     std::allocator<_Tp> __allocator_;
58     _Tp* __ptr_;
59     const std::size_t __buf_size_;
60     __buffer(const __buffer&) = delete;
61     void
62     operator=(const __buffer&) = delete;
63 
64   public:
__buffer(std::size_t __n)65     __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {}
66 
67     operator bool() const { return __ptr_ != nullptr; }
68 
69     _Tp*
get()70     get() const
71     {
72         return __ptr_;
73     }
~__buffer()74     ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); }
75 };
76 
77 // Preliminary size of each chunk: requires further discussion
78 inline constexpr std::size_t __default_chunk_size = 2048;
79 
80 // Convenience function to determine when we should run serial.
81 template <typename _Iterator, std::enable_if_t<!std::is_integral<_Iterator>::value, bool> = true>
82 constexpr auto
83 __should_run_serial(_Iterator __first, _Iterator __last) -> bool
84 {
85     using _difference_type = typename std::iterator_traits<_Iterator>::difference_type;
86     auto __size = std::distance(__first, __last);
87     return __size <= static_cast<_difference_type>(__default_chunk_size);
88 }
89 
90 template <typename _Index, std::enable_if_t<std::is_integral<_Index>::value, bool> = true>
91 constexpr auto
92 __should_run_serial(_Index __first, _Index __last) -> bool
93 {
94     using _difference_type = _Index;
95     auto __size = __last - __first;
96     return __size <= static_cast<_difference_type>(__default_chunk_size);
97 }
98 
99 struct __chunk_metrics
100 {
101     std::size_t __n_chunks;
102     std::size_t __chunk_size;
103     std::size_t __first_chunk_size;
104 };
105 
106 // The iteration space partitioner according to __requested_chunk_size
107 template <class _RandomAccessIterator, class _Size = std::size_t>
108 auto
109 __chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last,
110                     _Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics
111 {
112     /*
113      * This algorithm improves distribution of elements in chunks by avoiding
114      * small tail chunks. The leftover elements that do not fit neatly into
115      * the chunk size are redistributed to early chunks. This improves
116      * utilization of the processor's prefetch and reduces the number of
117      * tasks needed by 1.
118      */
119 
120     const _Size __n = __last - __first;
121     _Size __n_chunks = 0;
122     _Size __chunk_size = 0;
123     _Size __first_chunk_size = 0;
124     if (__n < __requested_chunk_size)
125     {
126         __chunk_size = __n;
127         __first_chunk_size = __n;
128         __n_chunks = 1;
129         return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
130     }
131 
132     __n_chunks = (__n / __requested_chunk_size) + 1;
133     __chunk_size = __n / __n_chunks;
134     __first_chunk_size = __chunk_size;
135     const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size);
136 
137     if (__n_leftover_items == __chunk_size)
138     {
139         __n_chunks += 1;
140         return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
141     }
142     else if (__n_leftover_items == 0)
143     {
144         __first_chunk_size = __chunk_size;
145         return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
146     }
147 
148     const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks;
149     const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks);
150 
151     __chunk_size += __n_extra_items_per_chunk;
152     __first_chunk_size = __chunk_size + __n_final_leftover_items;
153 
154     return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
155 }
156 
157 template <typename _Iterator, typename _Index, typename _Func>
158 void
__process_chunk(const __chunk_metrics & __metrics,_Iterator __base,_Index __chunk_index,_Func __f)159 __process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _Func __f)
160 {
161     auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size;
162     auto __index = __chunk_index == 0 ? 0
163                                       : (__chunk_index * __metrics.__chunk_size) +
164                                             (__metrics.__first_chunk_size - __metrics.__chunk_size);
165     auto __first = __base + __index;
166     auto __last = __first + __this_chunk_size;
167     __f(__first, __last);
168 }
169 
170 } // namespace __omp_backend
171 } // namespace __pstl
172 
173 #endif // _PSTL_INTERNAL_OMP_UTIL_H
174