1*06c3fb27SDimitry Andric /* 2*06c3fb27SDimitry Andric * kmp_collapse.h -- header for loop collapse feature 3*06c3fb27SDimitry Andric */ 4*06c3fb27SDimitry Andric 5*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 6*06c3fb27SDimitry Andric // 7*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 9*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10*06c3fb27SDimitry Andric // 11*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 12*06c3fb27SDimitry Andric 13*06c3fb27SDimitry Andric #ifndef KMP_COLLAPSE_H 14*06c3fb27SDimitry Andric #define KMP_COLLAPSE_H 15*06c3fb27SDimitry Andric 16*06c3fb27SDimitry Andric #include <type_traits> 17*06c3fb27SDimitry Andric 18*06c3fb27SDimitry Andric // Type of the index into the loop nest structures 19*06c3fb27SDimitry Andric // (with values from 0 to less than n from collapse(n)) 20*06c3fb27SDimitry Andric typedef kmp_int32 kmp_index_t; 21*06c3fb27SDimitry Andric 22*06c3fb27SDimitry Andric // Type for combined loop nest space IV: 23*06c3fb27SDimitry Andric typedef kmp_uint64 kmp_loop_nest_iv_t; 24*06c3fb27SDimitry Andric 25*06c3fb27SDimitry Andric // Loop has <, <=, etc. as a comparison: 26*06c3fb27SDimitry Andric enum comparison_t : kmp_int32 { 27*06c3fb27SDimitry Andric comp_less_or_eq = 0, 28*06c3fb27SDimitry Andric comp_greater_or_eq = 1, 29*06c3fb27SDimitry Andric comp_not_eq = 2, 30*06c3fb27SDimitry Andric comp_less = 3, 31*06c3fb27SDimitry Andric comp_greater = 4 32*06c3fb27SDimitry Andric }; 33*06c3fb27SDimitry Andric 34*06c3fb27SDimitry Andric // Type of loop IV. 35*06c3fb27SDimitry Andric // Type of bounds and step, after usual promotions 36*06c3fb27SDimitry Andric // are a subset of these types (32 & 64 only): 37*06c3fb27SDimitry Andric enum loop_type_t : kmp_int32 { 38*06c3fb27SDimitry Andric loop_type_uint8 = 0, 39*06c3fb27SDimitry Andric loop_type_int8 = 1, 40*06c3fb27SDimitry Andric loop_type_uint16 = 2, 41*06c3fb27SDimitry Andric loop_type_int16 = 3, 42*06c3fb27SDimitry Andric loop_type_uint32 = 4, 43*06c3fb27SDimitry Andric loop_type_int32 = 5, 44*06c3fb27SDimitry Andric loop_type_uint64 = 6, 45*06c3fb27SDimitry Andric loop_type_int64 = 7 46*06c3fb27SDimitry Andric }; 47*06c3fb27SDimitry Andric 48*06c3fb27SDimitry Andric /*! 49*06c3fb27SDimitry Andric @ingroup WORK_SHARING 50*06c3fb27SDimitry Andric * Describes the structure for rectangular nested loops. 51*06c3fb27SDimitry Andric */ 52*06c3fb27SDimitry Andric template <typename T> struct bounds_infoXX_template { 53*06c3fb27SDimitry Andric 54*06c3fb27SDimitry Andric // typedef typename traits_t<T>::unsigned_t UT; 55*06c3fb27SDimitry Andric typedef typename traits_t<T>::signed_t ST; 56*06c3fb27SDimitry Andric 57*06c3fb27SDimitry Andric loop_type_t loop_type; // The differentiator 58*06c3fb27SDimitry Andric loop_type_t loop_iv_type; 59*06c3fb27SDimitry Andric comparison_t comparison; 60*06c3fb27SDimitry Andric // outer_iv should be 0 (or any other less then number of dimentions) 61*06c3fb27SDimitry Andric // if loop doesn't depend on it (lb1 and ub1 will be 0). 62*06c3fb27SDimitry Andric // This way we can do multiplication without a check. 63*06c3fb27SDimitry Andric kmp_index_t outer_iv; 64*06c3fb27SDimitry Andric 65*06c3fb27SDimitry Andric // unions to keep the size constant: 66*06c3fb27SDimitry Andric union { 67*06c3fb27SDimitry Andric T lb0; 68*06c3fb27SDimitry Andric kmp_uint64 lb0_u64; // real type can be signed 69*06c3fb27SDimitry Andric }; 70*06c3fb27SDimitry Andric 71*06c3fb27SDimitry Andric union { 72*06c3fb27SDimitry Andric T lb1; 73*06c3fb27SDimitry Andric kmp_uint64 lb1_u64; // real type can be signed 74*06c3fb27SDimitry Andric }; 75*06c3fb27SDimitry Andric 76*06c3fb27SDimitry Andric union { 77*06c3fb27SDimitry Andric T ub0; 78*06c3fb27SDimitry Andric kmp_uint64 ub0_u64; // real type can be signed 79*06c3fb27SDimitry Andric }; 80*06c3fb27SDimitry Andric 81*06c3fb27SDimitry Andric union { 82*06c3fb27SDimitry Andric T ub1; 83*06c3fb27SDimitry Andric kmp_uint64 ub1_u64; // real type can be signed 84*06c3fb27SDimitry Andric }; 85*06c3fb27SDimitry Andric 86*06c3fb27SDimitry Andric union { 87*06c3fb27SDimitry Andric ST step; // signed even if bounds type is unsigned 88*06c3fb27SDimitry Andric kmp_int64 step_64; // signed 89*06c3fb27SDimitry Andric }; 90*06c3fb27SDimitry Andric 91*06c3fb27SDimitry Andric kmp_loop_nest_iv_t trip_count; 92*06c3fb27SDimitry Andric }; 93*06c3fb27SDimitry Andric 94*06c3fb27SDimitry Andric /*! 95*06c3fb27SDimitry Andric @ingroup WORK_SHARING 96*06c3fb27SDimitry Andric * Interface struct for rectangular nested loops. 97*06c3fb27SDimitry Andric * Same size as bounds_infoXX_template. 98*06c3fb27SDimitry Andric */ 99*06c3fb27SDimitry Andric struct bounds_info_t { 100*06c3fb27SDimitry Andric 101*06c3fb27SDimitry Andric loop_type_t loop_type; // The differentiator 102*06c3fb27SDimitry Andric loop_type_t loop_iv_type; 103*06c3fb27SDimitry Andric comparison_t comparison; 104*06c3fb27SDimitry Andric // outer_iv should be 0 (or any other less then number of dimentions) 105*06c3fb27SDimitry Andric // if loop doesn't depend on it (lb1 and ub1 will be 0). 106*06c3fb27SDimitry Andric // This way we can do multiplication without a check. 107*06c3fb27SDimitry Andric kmp_index_t outer_iv; 108*06c3fb27SDimitry Andric 109*06c3fb27SDimitry Andric kmp_uint64 lb0_u64; // real type can be signed 110*06c3fb27SDimitry Andric kmp_uint64 lb1_u64; // real type can be signed 111*06c3fb27SDimitry Andric kmp_uint64 ub0_u64; // real type can be signed 112*06c3fb27SDimitry Andric kmp_uint64 ub1_u64; // real type can be signed 113*06c3fb27SDimitry Andric kmp_int64 step_64; // signed 114*06c3fb27SDimitry Andric 115*06c3fb27SDimitry Andric // This is internal, but it's the only internal thing we need 116*06c3fb27SDimitry Andric // in rectangular case, so let's expose it here: 117*06c3fb27SDimitry Andric kmp_loop_nest_iv_t trip_count; 118*06c3fb27SDimitry Andric }; 119*06c3fb27SDimitry Andric 120*06c3fb27SDimitry Andric //------------------------------------------------------------------------- 121*06c3fb27SDimitry Andric // Additional types for internal representation: 122*06c3fb27SDimitry Andric 123*06c3fb27SDimitry Andric // Array for a point in the loop space, in the original space. 124*06c3fb27SDimitry Andric // It's represented in kmp_uint64, but each dimention is calculated in 125*06c3fb27SDimitry Andric // that loop IV type. Also dimentions have to be converted to those types 126*06c3fb27SDimitry Andric // when used in generated code. 127*06c3fb27SDimitry Andric typedef kmp_uint64* kmp_point_t; 128*06c3fb27SDimitry Andric 129*06c3fb27SDimitry Andric // Array: Number of loop iterations on each nesting level to achieve some point, 130*06c3fb27SDimitry Andric // in expanded space or in original space. 131*06c3fb27SDimitry Andric // OMPTODO: move from using iterations to using offsets (iterations multiplied 132*06c3fb27SDimitry Andric // by steps). For those we need to be careful with the types, as step can be 133*06c3fb27SDimitry Andric // negative, but it'll remove multiplications and divisions in several places. 134*06c3fb27SDimitry Andric typedef kmp_loop_nest_iv_t* kmp_iterations_t; 135*06c3fb27SDimitry Andric 136*06c3fb27SDimitry Andric // Internal struct with additional info: 137*06c3fb27SDimitry Andric template <typename T> struct bounds_info_internalXX_template { 138*06c3fb27SDimitry Andric 139*06c3fb27SDimitry Andric // OMPTODO: should span have type T or should it better be 140*06c3fb27SDimitry Andric // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than 141*06c3fb27SDimitry Andric // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to 142*06c3fb27SDimitry Andric // use big_span_t, if it can be resolved at compile time. 143*06c3fb27SDimitry Andric typedef 144*06c3fb27SDimitry Andric typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64> 145*06c3fb27SDimitry Andric big_span_t; 146*06c3fb27SDimitry Andric 147*06c3fb27SDimitry Andric // typedef typename big_span_t span_t; 148*06c3fb27SDimitry Andric typedef T span_t; 149*06c3fb27SDimitry Andric 150*06c3fb27SDimitry Andric bounds_infoXX_template<T> b; // possibly adjusted bounds 151*06c3fb27SDimitry Andric 152*06c3fb27SDimitry Andric // Leaving this as a union in case we'll switch to span_t with different sizes 153*06c3fb27SDimitry Andric // (depending on T) 154*06c3fb27SDimitry Andric union { 155*06c3fb27SDimitry Andric // Smallest possible value of iv (may be smaller than actually possible) 156*06c3fb27SDimitry Andric span_t span_smallest; 157*06c3fb27SDimitry Andric kmp_uint64 span_smallest_u64; 158*06c3fb27SDimitry Andric }; 159*06c3fb27SDimitry Andric 160*06c3fb27SDimitry Andric // Leaving this as a union in case we'll switch to span_t with different sizes 161*06c3fb27SDimitry Andric // (depending on T) 162*06c3fb27SDimitry Andric union { 163*06c3fb27SDimitry Andric // Biggest possible value of iv (may be bigger than actually possible) 164*06c3fb27SDimitry Andric span_t span_biggest; 165*06c3fb27SDimitry Andric kmp_uint64 span_biggest_u64; 166*06c3fb27SDimitry Andric }; 167*06c3fb27SDimitry Andric 168*06c3fb27SDimitry Andric // Did we adjust loop bounds (not counting canonicalization)? 169*06c3fb27SDimitry Andric bool loop_bounds_adjusted; 170*06c3fb27SDimitry Andric }; 171*06c3fb27SDimitry Andric 172*06c3fb27SDimitry Andric // Internal struct with additional info: 173*06c3fb27SDimitry Andric struct bounds_info_internal_t { 174*06c3fb27SDimitry Andric 175*06c3fb27SDimitry Andric bounds_info_t b; // possibly adjusted bounds 176*06c3fb27SDimitry Andric 177*06c3fb27SDimitry Andric // Smallest possible value of iv (may be smaller than actually possible) 178*06c3fb27SDimitry Andric kmp_uint64 span_smallest_u64; 179*06c3fb27SDimitry Andric 180*06c3fb27SDimitry Andric // Biggest possible value of iv (may be bigger than actually possible) 181*06c3fb27SDimitry Andric kmp_uint64 span_biggest_u64; 182*06c3fb27SDimitry Andric 183*06c3fb27SDimitry Andric // Did we adjust loop bounds (not counting canonicalization)? 184*06c3fb27SDimitry Andric bool loop_bounds_adjusted; 185*06c3fb27SDimitry Andric }; 186*06c3fb27SDimitry Andric 187*06c3fb27SDimitry Andric //----------APIs for rectangular loop nests-------------------------------- 188*06c3fb27SDimitry Andric 189*06c3fb27SDimitry Andric // Canonicalize loop nest and calculate overall trip count. 190*06c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread. 191*06c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form 192*06c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a 193*06c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array 194*06c3fb27SDimitry Andric // (only trip counts will be calculated). 195*06c3fb27SDimitry Andric // Returns trip count of overall space. 196*06c3fb27SDimitry Andric extern "C" kmp_loop_nest_iv_t 197*06c3fb27SDimitry Andric __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid, 198*06c3fb27SDimitry Andric /*in/out*/ bounds_info_t *original_bounds_nest, 199*06c3fb27SDimitry Andric kmp_index_t n); 200*06c3fb27SDimitry Andric 201*06c3fb27SDimitry Andric // Calculate old induction variables corresponding to overall new_iv. 202*06c3fb27SDimitry Andric // Note: original IV will be returned as if it had kmp_uint64 type, 203*06c3fb27SDimitry Andric // will have to be converted to original type in user code. 204*06c3fb27SDimitry Andric // Note: trip counts should be already calculated by 205*06c3fb27SDimitry Andric // __kmpc_process_loop_nest_rectang. 206*06c3fb27SDimitry Andric // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline 207*06c3fb27SDimitry Andric // that into user code. 208*06c3fb27SDimitry Andric extern "C" void 209*06c3fb27SDimitry Andric __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv, 210*06c3fb27SDimitry Andric const bounds_info_t *original_bounds_nest, 211*06c3fb27SDimitry Andric /*out*/ kmp_uint64 *original_ivs, 212*06c3fb27SDimitry Andric kmp_index_t n); 213*06c3fb27SDimitry Andric 214*06c3fb27SDimitry Andric //----------Init API for non-rectangular loops-------------------------------- 215*06c3fb27SDimitry Andric 216*06c3fb27SDimitry Andric // Init API for collapsed loops (static, no chunks defined). 217*06c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread. 218*06c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form 219*06c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a 220*06c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array 221*06c3fb27SDimitry Andric // (only trip counts will be calculated). Internally API will expand the space 222*06c3fb27SDimitry Andric // to parallelogram/parallelepiped, calculate total, calculate bounds for the 223*06c3fb27SDimitry Andric // chunks in terms of the new IV, re-calc them in terms of old IVs (especially 224*06c3fb27SDimitry Andric // important on the left side, to hit the lower bounds and not step over), and 225*06c3fb27SDimitry Andric // pick the correct chunk for this thread (so it will calculate chunks up to the 226*06c3fb27SDimitry Andric // needed one). It could be optimized to calculate just this chunk, potentially 227*06c3fb27SDimitry Andric // a bit less well distributed among threads. It is designed to make sure that 228*06c3fb27SDimitry Andric // threads will receive predictable chunks, deterministically (so that next nest 229*06c3fb27SDimitry Andric // of loops with similar characteristics will get exactly same chunks on same 230*06c3fb27SDimitry Andric // threads). 231*06c3fb27SDimitry Andric // Current contract: chunk_bounds_nest has only lb0 and ub0, 232*06c3fb27SDimitry Andric // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future). 233*06c3fb27SDimitry Andric extern "C" kmp_int32 234*06c3fb27SDimitry Andric __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, 235*06c3fb27SDimitry Andric /*in/out*/ bounds_info_t *original_bounds_nest, 236*06c3fb27SDimitry Andric /*out*/ bounds_info_t *chunk_bounds_nest, 237*06c3fb27SDimitry Andric kmp_index_t n, 238*06c3fb27SDimitry Andric /*out*/ kmp_int32 *plastiter); 239*06c3fb27SDimitry Andric 240*06c3fb27SDimitry Andric #endif // KMP_COLLAPSE_H 241