xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_collapse.h (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric /*
2*06c3fb27SDimitry Andric  * kmp_collapse.h -- header for loop collapse feature
3*06c3fb27SDimitry Andric  */
4*06c3fb27SDimitry Andric 
5*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
9*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10*06c3fb27SDimitry Andric //
11*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
12*06c3fb27SDimitry Andric 
13*06c3fb27SDimitry Andric #ifndef KMP_COLLAPSE_H
14*06c3fb27SDimitry Andric #define KMP_COLLAPSE_H
15*06c3fb27SDimitry Andric 
16*06c3fb27SDimitry Andric #include <type_traits>
17*06c3fb27SDimitry Andric 
18*06c3fb27SDimitry Andric // Type of the index into the loop nest structures
19*06c3fb27SDimitry Andric // (with values from 0 to less than n from collapse(n))
20*06c3fb27SDimitry Andric typedef kmp_int32 kmp_index_t;
21*06c3fb27SDimitry Andric 
22*06c3fb27SDimitry Andric // Type for combined loop nest space IV:
23*06c3fb27SDimitry Andric typedef kmp_uint64 kmp_loop_nest_iv_t;
24*06c3fb27SDimitry Andric 
25*06c3fb27SDimitry Andric // Loop has <, <=, etc. as a comparison:
26*06c3fb27SDimitry Andric enum comparison_t : kmp_int32 {
27*06c3fb27SDimitry Andric   comp_less_or_eq = 0,
28*06c3fb27SDimitry Andric   comp_greater_or_eq = 1,
29*06c3fb27SDimitry Andric   comp_not_eq = 2,
30*06c3fb27SDimitry Andric   comp_less = 3,
31*06c3fb27SDimitry Andric   comp_greater = 4
32*06c3fb27SDimitry Andric };
33*06c3fb27SDimitry Andric 
34*06c3fb27SDimitry Andric // Type of loop IV.
35*06c3fb27SDimitry Andric // Type of bounds and step, after usual promotions
36*06c3fb27SDimitry Andric // are a subset of these types (32 & 64 only):
37*06c3fb27SDimitry Andric enum loop_type_t : kmp_int32 {
38*06c3fb27SDimitry Andric   loop_type_uint8 = 0,
39*06c3fb27SDimitry Andric   loop_type_int8 = 1,
40*06c3fb27SDimitry Andric   loop_type_uint16 = 2,
41*06c3fb27SDimitry Andric   loop_type_int16 = 3,
42*06c3fb27SDimitry Andric   loop_type_uint32 = 4,
43*06c3fb27SDimitry Andric   loop_type_int32 = 5,
44*06c3fb27SDimitry Andric   loop_type_uint64 = 6,
45*06c3fb27SDimitry Andric   loop_type_int64 = 7
46*06c3fb27SDimitry Andric };
47*06c3fb27SDimitry Andric 
48*06c3fb27SDimitry Andric /*!
49*06c3fb27SDimitry Andric  @ingroup WORK_SHARING
50*06c3fb27SDimitry Andric  * Describes the structure for rectangular nested loops.
51*06c3fb27SDimitry Andric  */
52*06c3fb27SDimitry Andric template <typename T> struct bounds_infoXX_template {
53*06c3fb27SDimitry Andric 
54*06c3fb27SDimitry Andric   // typedef typename traits_t<T>::unsigned_t UT;
55*06c3fb27SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
56*06c3fb27SDimitry Andric 
57*06c3fb27SDimitry Andric   loop_type_t loop_type; // The differentiator
58*06c3fb27SDimitry Andric   loop_type_t loop_iv_type;
59*06c3fb27SDimitry Andric   comparison_t comparison;
60*06c3fb27SDimitry Andric   // outer_iv should be 0 (or any other less then number of dimentions)
61*06c3fb27SDimitry Andric   // if loop doesn't depend on it (lb1 and ub1 will be 0).
62*06c3fb27SDimitry Andric   // This way we can do multiplication without a check.
63*06c3fb27SDimitry Andric   kmp_index_t outer_iv;
64*06c3fb27SDimitry Andric 
65*06c3fb27SDimitry Andric   // unions to keep the size constant:
66*06c3fb27SDimitry Andric   union {
67*06c3fb27SDimitry Andric     T lb0;
68*06c3fb27SDimitry Andric     kmp_uint64 lb0_u64; // real type can be signed
69*06c3fb27SDimitry Andric   };
70*06c3fb27SDimitry Andric 
71*06c3fb27SDimitry Andric   union {
72*06c3fb27SDimitry Andric     T lb1;
73*06c3fb27SDimitry Andric     kmp_uint64 lb1_u64; // real type can be signed
74*06c3fb27SDimitry Andric   };
75*06c3fb27SDimitry Andric 
76*06c3fb27SDimitry Andric   union {
77*06c3fb27SDimitry Andric     T ub0;
78*06c3fb27SDimitry Andric     kmp_uint64 ub0_u64; // real type can be signed
79*06c3fb27SDimitry Andric   };
80*06c3fb27SDimitry Andric 
81*06c3fb27SDimitry Andric   union {
82*06c3fb27SDimitry Andric     T ub1;
83*06c3fb27SDimitry Andric     kmp_uint64 ub1_u64; // real type can be signed
84*06c3fb27SDimitry Andric   };
85*06c3fb27SDimitry Andric 
86*06c3fb27SDimitry Andric   union {
87*06c3fb27SDimitry Andric     ST step; // signed even if bounds type is unsigned
88*06c3fb27SDimitry Andric     kmp_int64 step_64; // signed
89*06c3fb27SDimitry Andric   };
90*06c3fb27SDimitry Andric 
91*06c3fb27SDimitry Andric   kmp_loop_nest_iv_t trip_count;
92*06c3fb27SDimitry Andric };
93*06c3fb27SDimitry Andric 
94*06c3fb27SDimitry Andric /*!
95*06c3fb27SDimitry Andric  @ingroup WORK_SHARING
96*06c3fb27SDimitry Andric  * Interface struct for rectangular nested loops.
97*06c3fb27SDimitry Andric  * Same size as bounds_infoXX_template.
98*06c3fb27SDimitry Andric  */
99*06c3fb27SDimitry Andric struct bounds_info_t {
100*06c3fb27SDimitry Andric 
101*06c3fb27SDimitry Andric   loop_type_t loop_type; // The differentiator
102*06c3fb27SDimitry Andric   loop_type_t loop_iv_type;
103*06c3fb27SDimitry Andric   comparison_t comparison;
104*06c3fb27SDimitry Andric   // outer_iv should be 0  (or any other less then number of dimentions)
105*06c3fb27SDimitry Andric   // if loop doesn't depend on it (lb1 and ub1 will be 0).
106*06c3fb27SDimitry Andric   // This way we can do multiplication without a check.
107*06c3fb27SDimitry Andric   kmp_index_t outer_iv;
108*06c3fb27SDimitry Andric 
109*06c3fb27SDimitry Andric   kmp_uint64 lb0_u64; // real type can be signed
110*06c3fb27SDimitry Andric   kmp_uint64 lb1_u64; // real type can be signed
111*06c3fb27SDimitry Andric   kmp_uint64 ub0_u64; // real type can be signed
112*06c3fb27SDimitry Andric   kmp_uint64 ub1_u64; // real type can be signed
113*06c3fb27SDimitry Andric   kmp_int64 step_64; // signed
114*06c3fb27SDimitry Andric 
115*06c3fb27SDimitry Andric   // This is internal, but it's the only internal thing we need
116*06c3fb27SDimitry Andric   // in rectangular case, so let's expose it here:
117*06c3fb27SDimitry Andric   kmp_loop_nest_iv_t trip_count;
118*06c3fb27SDimitry Andric };
119*06c3fb27SDimitry Andric 
120*06c3fb27SDimitry Andric //-------------------------------------------------------------------------
121*06c3fb27SDimitry Andric // Additional types for internal representation:
122*06c3fb27SDimitry Andric 
123*06c3fb27SDimitry Andric // Array for a point in the loop space, in the original space.
124*06c3fb27SDimitry Andric // It's represented in kmp_uint64, but each dimention is calculated in
125*06c3fb27SDimitry Andric // that loop IV type. Also dimentions have to be converted to those types
126*06c3fb27SDimitry Andric // when used in generated code.
127*06c3fb27SDimitry Andric typedef kmp_uint64* kmp_point_t;
128*06c3fb27SDimitry Andric 
129*06c3fb27SDimitry Andric // Array: Number of loop iterations on each nesting level to achieve some point,
130*06c3fb27SDimitry Andric // in expanded space or in original space.
131*06c3fb27SDimitry Andric // OMPTODO: move from using iterations to using offsets (iterations multiplied
132*06c3fb27SDimitry Andric // by steps). For those we need to be careful with the types, as step can be
133*06c3fb27SDimitry Andric // negative, but it'll remove multiplications and divisions in several places.
134*06c3fb27SDimitry Andric typedef kmp_loop_nest_iv_t* kmp_iterations_t;
135*06c3fb27SDimitry Andric 
136*06c3fb27SDimitry Andric // Internal struct with additional info:
137*06c3fb27SDimitry Andric template <typename T> struct bounds_info_internalXX_template {
138*06c3fb27SDimitry Andric 
139*06c3fb27SDimitry Andric   // OMPTODO: should span have type T or should it better be
140*06c3fb27SDimitry Andric   // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than
141*06c3fb27SDimitry Andric   // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to
142*06c3fb27SDimitry Andric   // use big_span_t, if it can be resolved at compile time.
143*06c3fb27SDimitry Andric   typedef
144*06c3fb27SDimitry Andric       typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>
145*06c3fb27SDimitry Andric           big_span_t;
146*06c3fb27SDimitry Andric 
147*06c3fb27SDimitry Andric   // typedef typename big_span_t span_t;
148*06c3fb27SDimitry Andric   typedef T span_t;
149*06c3fb27SDimitry Andric 
150*06c3fb27SDimitry Andric   bounds_infoXX_template<T> b; // possibly adjusted bounds
151*06c3fb27SDimitry Andric 
152*06c3fb27SDimitry Andric   // Leaving this as a union in case we'll switch to span_t with different sizes
153*06c3fb27SDimitry Andric   // (depending on T)
154*06c3fb27SDimitry Andric   union {
155*06c3fb27SDimitry Andric     // Smallest possible value of iv (may be smaller than actually possible)
156*06c3fb27SDimitry Andric     span_t span_smallest;
157*06c3fb27SDimitry Andric     kmp_uint64 span_smallest_u64;
158*06c3fb27SDimitry Andric   };
159*06c3fb27SDimitry Andric 
160*06c3fb27SDimitry Andric   // Leaving this as a union in case we'll switch to span_t with different sizes
161*06c3fb27SDimitry Andric   // (depending on T)
162*06c3fb27SDimitry Andric   union {
163*06c3fb27SDimitry Andric     // Biggest possible value of iv (may be bigger than actually possible)
164*06c3fb27SDimitry Andric     span_t span_biggest;
165*06c3fb27SDimitry Andric     kmp_uint64 span_biggest_u64;
166*06c3fb27SDimitry Andric   };
167*06c3fb27SDimitry Andric 
168*06c3fb27SDimitry Andric   // Did we adjust loop bounds (not counting canonicalization)?
169*06c3fb27SDimitry Andric   bool loop_bounds_adjusted;
170*06c3fb27SDimitry Andric };
171*06c3fb27SDimitry Andric 
172*06c3fb27SDimitry Andric // Internal struct with additional info:
173*06c3fb27SDimitry Andric struct bounds_info_internal_t {
174*06c3fb27SDimitry Andric 
175*06c3fb27SDimitry Andric   bounds_info_t b; // possibly adjusted bounds
176*06c3fb27SDimitry Andric 
177*06c3fb27SDimitry Andric   // Smallest possible value of iv (may be smaller than actually possible)
178*06c3fb27SDimitry Andric   kmp_uint64 span_smallest_u64;
179*06c3fb27SDimitry Andric 
180*06c3fb27SDimitry Andric   // Biggest possible value of iv (may be bigger than actually possible)
181*06c3fb27SDimitry Andric   kmp_uint64 span_biggest_u64;
182*06c3fb27SDimitry Andric 
183*06c3fb27SDimitry Andric   // Did we adjust loop bounds (not counting canonicalization)?
184*06c3fb27SDimitry Andric   bool loop_bounds_adjusted;
185*06c3fb27SDimitry Andric };
186*06c3fb27SDimitry Andric 
187*06c3fb27SDimitry Andric //----------APIs for rectangular loop nests--------------------------------
188*06c3fb27SDimitry Andric 
189*06c3fb27SDimitry Andric // Canonicalize loop nest and calculate overall trip count.
190*06c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread.
191*06c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form
192*06c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a
193*06c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array
194*06c3fb27SDimitry Andric // (only trip counts will be calculated).
195*06c3fb27SDimitry Andric // Returns trip count of overall space.
196*06c3fb27SDimitry Andric extern "C" kmp_loop_nest_iv_t
197*06c3fb27SDimitry Andric __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
198*06c3fb27SDimitry Andric                                  /*in/out*/ bounds_info_t *original_bounds_nest,
199*06c3fb27SDimitry Andric                                  kmp_index_t n);
200*06c3fb27SDimitry Andric 
201*06c3fb27SDimitry Andric // Calculate old induction variables corresponding to overall new_iv.
202*06c3fb27SDimitry Andric // Note: original IV will be returned as if it had kmp_uint64 type,
203*06c3fb27SDimitry Andric // will have to be converted to original type in user code.
204*06c3fb27SDimitry Andric // Note: trip counts should be already calculated by
205*06c3fb27SDimitry Andric // __kmpc_process_loop_nest_rectang.
206*06c3fb27SDimitry Andric // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
207*06c3fb27SDimitry Andric // that into user code.
208*06c3fb27SDimitry Andric extern "C" void
209*06c3fb27SDimitry Andric __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
210*06c3fb27SDimitry Andric                                  const bounds_info_t *original_bounds_nest,
211*06c3fb27SDimitry Andric                                  /*out*/ kmp_uint64 *original_ivs,
212*06c3fb27SDimitry Andric                                  kmp_index_t n);
213*06c3fb27SDimitry Andric 
214*06c3fb27SDimitry Andric //----------Init API for non-rectangular loops--------------------------------
215*06c3fb27SDimitry Andric 
216*06c3fb27SDimitry Andric // Init API for collapsed loops (static, no chunks defined).
217*06c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread.
218*06c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form
219*06c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a
220*06c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array
221*06c3fb27SDimitry Andric // (only trip counts will be calculated). Internally API will expand the space
222*06c3fb27SDimitry Andric // to parallelogram/parallelepiped, calculate total, calculate bounds for the
223*06c3fb27SDimitry Andric // chunks in terms of the new IV, re-calc them in terms of old IVs (especially
224*06c3fb27SDimitry Andric // important on the left side, to hit the lower bounds and not step over), and
225*06c3fb27SDimitry Andric // pick the correct chunk for this thread (so it will calculate chunks up to the
226*06c3fb27SDimitry Andric // needed one). It could be optimized to calculate just this chunk, potentially
227*06c3fb27SDimitry Andric // a bit less well distributed among threads. It is designed to make sure that
228*06c3fb27SDimitry Andric // threads will receive predictable chunks, deterministically (so that next nest
229*06c3fb27SDimitry Andric // of loops with similar characteristics will get exactly same chunks on same
230*06c3fb27SDimitry Andric // threads).
231*06c3fb27SDimitry Andric // Current contract: chunk_bounds_nest has only lb0 and ub0,
232*06c3fb27SDimitry Andric // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
233*06c3fb27SDimitry Andric extern "C" kmp_int32
234*06c3fb27SDimitry Andric __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
235*06c3fb27SDimitry Andric                           /*in/out*/ bounds_info_t *original_bounds_nest,
236*06c3fb27SDimitry Andric                           /*out*/ bounds_info_t *chunk_bounds_nest,
237*06c3fb27SDimitry Andric                           kmp_index_t n,
238*06c3fb27SDimitry Andric                           /*out*/ kmp_int32 *plastiter);
239*06c3fb27SDimitry Andric 
240*06c3fb27SDimitry Andric #endif // KMP_COLLAPSE_H
241