1*06c3fb27SDimitry Andric /* 2*06c3fb27SDimitry Andric * kmp_collapse.cpp -- loop collapse feature 3*06c3fb27SDimitry Andric */ 4*06c3fb27SDimitry Andric 5*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 6*06c3fb27SDimitry Andric // 7*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 9*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10*06c3fb27SDimitry Andric // 11*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 12*06c3fb27SDimitry Andric 13*06c3fb27SDimitry Andric #include "kmp.h" 14*06c3fb27SDimitry Andric #include "kmp_error.h" 15*06c3fb27SDimitry Andric #include "kmp_i18n.h" 16*06c3fb27SDimitry Andric #include "kmp_itt.h" 17*06c3fb27SDimitry Andric #include "kmp_stats.h" 18*06c3fb27SDimitry Andric #include "kmp_str.h" 19*06c3fb27SDimitry Andric #include "kmp_collapse.h" 20*06c3fb27SDimitry Andric 21*06c3fb27SDimitry Andric #if OMPT_SUPPORT 22*06c3fb27SDimitry Andric #include "ompt-specific.h" 23*06c3fb27SDimitry Andric #endif 24*06c3fb27SDimitry Andric 25*06c3fb27SDimitry Andric // OMPTODO: different style of comments (see kmp_sched) 26*06c3fb27SDimitry Andric // OMPTODO: OMPT/OMPD 27*06c3fb27SDimitry Andric 28*06c3fb27SDimitry Andric // avoid inadevertently using a library based abs 29*06c3fb27SDimitry Andric template <typename T> T __kmp_abs(const T val) { 30*06c3fb27SDimitry Andric return (val < 0) ? -val: val; 31*06c3fb27SDimitry Andric } 32*06c3fb27SDimitry Andric kmp_uint32 __kmp_abs(const kmp_uint32 val) { return val; } 33*06c3fb27SDimitry Andric kmp_uint64 __kmp_abs(const kmp_uint64 val) { return val; } 34*06c3fb27SDimitry Andric 35*06c3fb27SDimitry Andric //---------------------------------------------------------------------------- 36*06c3fb27SDimitry Andric // Common functions for working with rectangular and non-rectangular loops 37*06c3fb27SDimitry Andric //---------------------------------------------------------------------------- 38*06c3fb27SDimitry Andric 39*06c3fb27SDimitry Andric template <typename T> int __kmp_sign(T val) { return (T(0) < val) - (val < T(0)); } 40*06c3fb27SDimitry Andric 41*06c3fb27SDimitry Andric //----------Loop canonicalization--------------------------------------------- 42*06c3fb27SDimitry Andric 43*06c3fb27SDimitry Andric // For loop nest (any shape): 44*06c3fb27SDimitry Andric // convert != to < or >; 45*06c3fb27SDimitry Andric // switch from using < or > to <= or >=. 46*06c3fb27SDimitry Andric // "bounds" array has to be allocated per thread. 47*06c3fb27SDimitry Andric // All other internal functions will work only with canonicalized loops. 48*06c3fb27SDimitry Andric template <typename T> 49*06c3fb27SDimitry Andric void kmp_canonicalize_one_loop_XX( 50*06c3fb27SDimitry Andric ident_t *loc, 51*06c3fb27SDimitry Andric /*in/out*/ bounds_infoXX_template<T> *bounds) { 52*06c3fb27SDimitry Andric 53*06c3fb27SDimitry Andric if (__kmp_env_consistency_check) { 54*06c3fb27SDimitry Andric if (bounds->step == 0) { 55*06c3fb27SDimitry Andric __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 56*06c3fb27SDimitry Andric loc); 57*06c3fb27SDimitry Andric } 58*06c3fb27SDimitry Andric } 59*06c3fb27SDimitry Andric 60*06c3fb27SDimitry Andric if (bounds->comparison == comparison_t::comp_not_eq) { 61*06c3fb27SDimitry Andric // We can convert this to < or >, depends on the sign of the step: 62*06c3fb27SDimitry Andric if (bounds->step > 0) { 63*06c3fb27SDimitry Andric bounds->comparison = comparison_t::comp_less; 64*06c3fb27SDimitry Andric } else { 65*06c3fb27SDimitry Andric bounds->comparison = comparison_t::comp_greater; 66*06c3fb27SDimitry Andric } 67*06c3fb27SDimitry Andric } 68*06c3fb27SDimitry Andric 69*06c3fb27SDimitry Andric if (bounds->comparison == comparison_t::comp_less) { 70*06c3fb27SDimitry Andric // Note: ub0 can be unsigned. Should be Ok to hit overflow here, 71*06c3fb27SDimitry Andric // because ub0 + ub1*j should be still positive (otherwise loop was not 72*06c3fb27SDimitry Andric // well formed) 73*06c3fb27SDimitry Andric bounds->ub0 -= 1; 74*06c3fb27SDimitry Andric bounds->comparison = comparison_t::comp_less_or_eq; 75*06c3fb27SDimitry Andric } else if (bounds->comparison == comparison_t::comp_greater) { 76*06c3fb27SDimitry Andric bounds->ub0 += 1; 77*06c3fb27SDimitry Andric bounds->comparison = comparison_t::comp_greater_or_eq; 78*06c3fb27SDimitry Andric } 79*06c3fb27SDimitry Andric } 80*06c3fb27SDimitry Andric 81*06c3fb27SDimitry Andric // Canonicalize loop nest. original_bounds_nest is an array of length n. 82*06c3fb27SDimitry Andric void kmp_canonicalize_loop_nest(ident_t *loc, 83*06c3fb27SDimitry Andric /*in/out*/ bounds_info_t *original_bounds_nest, 84*06c3fb27SDimitry Andric kmp_index_t n) { 85*06c3fb27SDimitry Andric 86*06c3fb27SDimitry Andric for (kmp_index_t ind = 0; ind < n; ++ind) { 87*06c3fb27SDimitry Andric auto bounds = &(original_bounds_nest[ind]); 88*06c3fb27SDimitry Andric 89*06c3fb27SDimitry Andric switch (bounds->loop_type) { 90*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 91*06c3fb27SDimitry Andric kmp_canonicalize_one_loop_XX<kmp_int32>( 92*06c3fb27SDimitry Andric loc, 93*06c3fb27SDimitry Andric /*in/out*/ (bounds_infoXX_template<kmp_int32> *)(bounds)); 94*06c3fb27SDimitry Andric break; 95*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 96*06c3fb27SDimitry Andric kmp_canonicalize_one_loop_XX<kmp_uint32>( 97*06c3fb27SDimitry Andric loc, 98*06c3fb27SDimitry Andric /*in/out*/ (bounds_infoXX_template<kmp_uint32> *)(bounds)); 99*06c3fb27SDimitry Andric break; 100*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 101*06c3fb27SDimitry Andric kmp_canonicalize_one_loop_XX<kmp_int64>( 102*06c3fb27SDimitry Andric loc, 103*06c3fb27SDimitry Andric /*in/out*/ (bounds_infoXX_template<kmp_int64> *)(bounds)); 104*06c3fb27SDimitry Andric break; 105*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 106*06c3fb27SDimitry Andric kmp_canonicalize_one_loop_XX<kmp_uint64>( 107*06c3fb27SDimitry Andric loc, 108*06c3fb27SDimitry Andric /*in/out*/ (bounds_infoXX_template<kmp_uint64> *)(bounds)); 109*06c3fb27SDimitry Andric break; 110*06c3fb27SDimitry Andric default: 111*06c3fb27SDimitry Andric KMP_ASSERT(false); 112*06c3fb27SDimitry Andric } 113*06c3fb27SDimitry Andric } 114*06c3fb27SDimitry Andric } 115*06c3fb27SDimitry Andric 116*06c3fb27SDimitry Andric //----------Calculating trip count on one level------------------------------- 117*06c3fb27SDimitry Andric 118*06c3fb27SDimitry Andric // Calculate trip count on this loop level. 119*06c3fb27SDimitry Andric // We do this either for a rectangular loop nest, 120*06c3fb27SDimitry Andric // or after an adjustment bringing the loops to a parallelepiped shape. 121*06c3fb27SDimitry Andric // This number should not depend on the value of outer IV 122*06c3fb27SDimitry Andric // even if the formular has lb1 and ub1. 123*06c3fb27SDimitry Andric // Note: for non-rectangular loops don't use span for this, it's too big. 124*06c3fb27SDimitry Andric 125*06c3fb27SDimitry Andric template <typename T> 126*06c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_calculate_trip_count_XX( 127*06c3fb27SDimitry Andric /*in/out*/ bounds_infoXX_template<T> *bounds) { 128*06c3fb27SDimitry Andric 129*06c3fb27SDimitry Andric if (bounds->comparison == comparison_t::comp_less_or_eq) { 130*06c3fb27SDimitry Andric if (bounds->ub0 < bounds->lb0) { 131*06c3fb27SDimitry Andric // Note: after this we don't need to calculate inner loops, 132*06c3fb27SDimitry Andric // but that should be an edge case: 133*06c3fb27SDimitry Andric bounds->trip_count = 0; 134*06c3fb27SDimitry Andric } else { 135*06c3fb27SDimitry Andric // ub - lb may exceed signed type range; we need to cast to 136*06c3fb27SDimitry Andric // kmp_loop_nest_iv_t anyway 137*06c3fb27SDimitry Andric bounds->trip_count = 138*06c3fb27SDimitry Andric static_cast<kmp_loop_nest_iv_t>(bounds->ub0 - bounds->lb0) / 139*06c3fb27SDimitry Andric __kmp_abs(bounds->step) + 140*06c3fb27SDimitry Andric 1; 141*06c3fb27SDimitry Andric } 142*06c3fb27SDimitry Andric } else if (bounds->comparison == comparison_t::comp_greater_or_eq) { 143*06c3fb27SDimitry Andric if (bounds->lb0 < bounds->ub0) { 144*06c3fb27SDimitry Andric // Note: after this we don't need to calculate inner loops, 145*06c3fb27SDimitry Andric // but that should be an edge case: 146*06c3fb27SDimitry Andric bounds->trip_count = 0; 147*06c3fb27SDimitry Andric } else { 148*06c3fb27SDimitry Andric // lb - ub may exceed signed type range; we need to cast to 149*06c3fb27SDimitry Andric // kmp_loop_nest_iv_t anyway 150*06c3fb27SDimitry Andric bounds->trip_count = 151*06c3fb27SDimitry Andric static_cast<kmp_loop_nest_iv_t>(bounds->lb0 - bounds->ub0) / 152*06c3fb27SDimitry Andric __kmp_abs(bounds->step) + 153*06c3fb27SDimitry Andric 1; 154*06c3fb27SDimitry Andric } 155*06c3fb27SDimitry Andric } else { 156*06c3fb27SDimitry Andric KMP_ASSERT(false); 157*06c3fb27SDimitry Andric } 158*06c3fb27SDimitry Andric return bounds->trip_count; 159*06c3fb27SDimitry Andric } 160*06c3fb27SDimitry Andric 161*06c3fb27SDimitry Andric // Calculate trip count on this loop level. 162*06c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_calculate_trip_count(/*in/out*/ bounds_info_t *bounds) { 163*06c3fb27SDimitry Andric 164*06c3fb27SDimitry Andric kmp_loop_nest_iv_t trip_count = 0; 165*06c3fb27SDimitry Andric 166*06c3fb27SDimitry Andric switch (bounds->loop_type) { 167*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 168*06c3fb27SDimitry Andric trip_count = kmp_calculate_trip_count_XX<kmp_int32>( 169*06c3fb27SDimitry Andric /*in/out*/ (bounds_infoXX_template<kmp_int32> *)(bounds)); 170*06c3fb27SDimitry Andric break; 171*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 172*06c3fb27SDimitry Andric trip_count = kmp_calculate_trip_count_XX<kmp_uint32>( 173*06c3fb27SDimitry Andric /*in/out*/ (bounds_infoXX_template<kmp_uint32> *)(bounds)); 174*06c3fb27SDimitry Andric break; 175*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 176*06c3fb27SDimitry Andric trip_count = kmp_calculate_trip_count_XX<kmp_int64>( 177*06c3fb27SDimitry Andric /*in/out*/ (bounds_infoXX_template<kmp_int64> *)(bounds)); 178*06c3fb27SDimitry Andric break; 179*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 180*06c3fb27SDimitry Andric trip_count = kmp_calculate_trip_count_XX<kmp_uint64>( 181*06c3fb27SDimitry Andric /*in/out*/ (bounds_infoXX_template<kmp_uint64> *)(bounds)); 182*06c3fb27SDimitry Andric break; 183*06c3fb27SDimitry Andric default: 184*06c3fb27SDimitry Andric KMP_ASSERT(false); 185*06c3fb27SDimitry Andric } 186*06c3fb27SDimitry Andric 187*06c3fb27SDimitry Andric return trip_count; 188*06c3fb27SDimitry Andric } 189*06c3fb27SDimitry Andric 190*06c3fb27SDimitry Andric //----------Trim original iv according to its type---------------------------- 191*06c3fb27SDimitry Andric 192*06c3fb27SDimitry Andric // Trim original iv according to its type. 193*06c3fb27SDimitry Andric // Return kmp_uint64 value which can be easily used in all internal calculations 194*06c3fb27SDimitry Andric // And can be statically cast back to original type in user code. 195*06c3fb27SDimitry Andric kmp_uint64 kmp_fix_iv(loop_type_t loop_iv_type, kmp_uint64 original_iv) { 196*06c3fb27SDimitry Andric kmp_uint64 res = 0; 197*06c3fb27SDimitry Andric 198*06c3fb27SDimitry Andric switch (loop_iv_type) { 199*06c3fb27SDimitry Andric case loop_type_t::loop_type_int8: 200*06c3fb27SDimitry Andric res = static_cast<kmp_uint64>(static_cast<kmp_int8>(original_iv)); 201*06c3fb27SDimitry Andric break; 202*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint8: 203*06c3fb27SDimitry Andric res = static_cast<kmp_uint64>(static_cast<kmp_uint8>(original_iv)); 204*06c3fb27SDimitry Andric break; 205*06c3fb27SDimitry Andric case loop_type_t::loop_type_int16: 206*06c3fb27SDimitry Andric res = static_cast<kmp_uint64>(static_cast<kmp_int16>(original_iv)); 207*06c3fb27SDimitry Andric break; 208*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint16: 209*06c3fb27SDimitry Andric res = static_cast<kmp_uint64>(static_cast<kmp_uint16>(original_iv)); 210*06c3fb27SDimitry Andric break; 211*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 212*06c3fb27SDimitry Andric res = static_cast<kmp_uint64>(static_cast<kmp_int32>(original_iv)); 213*06c3fb27SDimitry Andric break; 214*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 215*06c3fb27SDimitry Andric res = static_cast<kmp_uint64>(static_cast<kmp_uint32>(original_iv)); 216*06c3fb27SDimitry Andric break; 217*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 218*06c3fb27SDimitry Andric res = static_cast<kmp_uint64>(static_cast<kmp_int64>(original_iv)); 219*06c3fb27SDimitry Andric break; 220*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 221*06c3fb27SDimitry Andric res = static_cast<kmp_uint64>(original_iv); 222*06c3fb27SDimitry Andric break; 223*06c3fb27SDimitry Andric default: 224*06c3fb27SDimitry Andric KMP_ASSERT(false); 225*06c3fb27SDimitry Andric } 226*06c3fb27SDimitry Andric 227*06c3fb27SDimitry Andric return res; 228*06c3fb27SDimitry Andric } 229*06c3fb27SDimitry Andric 230*06c3fb27SDimitry Andric //----------Compare two IVs (remember they have a type)----------------------- 231*06c3fb27SDimitry Andric 232*06c3fb27SDimitry Andric bool kmp_ivs_eq(loop_type_t loop_iv_type, kmp_uint64 original_iv1, 233*06c3fb27SDimitry Andric kmp_uint64 original_iv2) { 234*06c3fb27SDimitry Andric bool res = false; 235*06c3fb27SDimitry Andric 236*06c3fb27SDimitry Andric switch (loop_iv_type) { 237*06c3fb27SDimitry Andric case loop_type_t::loop_type_int8: 238*06c3fb27SDimitry Andric res = static_cast<kmp_int8>(original_iv1) == 239*06c3fb27SDimitry Andric static_cast<kmp_int8>(original_iv2); 240*06c3fb27SDimitry Andric break; 241*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint8: 242*06c3fb27SDimitry Andric res = static_cast<kmp_uint8>(original_iv1) == 243*06c3fb27SDimitry Andric static_cast<kmp_uint8>(original_iv2); 244*06c3fb27SDimitry Andric break; 245*06c3fb27SDimitry Andric case loop_type_t::loop_type_int16: 246*06c3fb27SDimitry Andric res = static_cast<kmp_int16>(original_iv1) == 247*06c3fb27SDimitry Andric static_cast<kmp_int16>(original_iv2); 248*06c3fb27SDimitry Andric break; 249*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint16: 250*06c3fb27SDimitry Andric res = static_cast<kmp_uint16>(original_iv1) == 251*06c3fb27SDimitry Andric static_cast<kmp_uint16>(original_iv2); 252*06c3fb27SDimitry Andric break; 253*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 254*06c3fb27SDimitry Andric res = static_cast<kmp_int32>(original_iv1) == 255*06c3fb27SDimitry Andric static_cast<kmp_int32>(original_iv2); 256*06c3fb27SDimitry Andric break; 257*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 258*06c3fb27SDimitry Andric res = static_cast<kmp_uint32>(original_iv1) == 259*06c3fb27SDimitry Andric static_cast<kmp_uint32>(original_iv2); 260*06c3fb27SDimitry Andric break; 261*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 262*06c3fb27SDimitry Andric res = static_cast<kmp_int64>(original_iv1) == 263*06c3fb27SDimitry Andric static_cast<kmp_int64>(original_iv2); 264*06c3fb27SDimitry Andric break; 265*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 266*06c3fb27SDimitry Andric res = static_cast<kmp_uint64>(original_iv1) == 267*06c3fb27SDimitry Andric static_cast<kmp_uint64>(original_iv2); 268*06c3fb27SDimitry Andric break; 269*06c3fb27SDimitry Andric default: 270*06c3fb27SDimitry Andric KMP_ASSERT(false); 271*06c3fb27SDimitry Andric } 272*06c3fb27SDimitry Andric 273*06c3fb27SDimitry Andric return res; 274*06c3fb27SDimitry Andric } 275*06c3fb27SDimitry Andric 276*06c3fb27SDimitry Andric //----------Calculate original iv on one level-------------------------------- 277*06c3fb27SDimitry Andric 278*06c3fb27SDimitry Andric // Return true if the point fits into upper bounds on this level, 279*06c3fb27SDimitry Andric // false otherwise 280*06c3fb27SDimitry Andric template <typename T> 281*06c3fb27SDimitry Andric bool kmp_iv_is_in_upper_bound_XX(const bounds_infoXX_template<T> *bounds, 282*06c3fb27SDimitry Andric const kmp_point_t original_ivs, 283*06c3fb27SDimitry Andric kmp_index_t ind) { 284*06c3fb27SDimitry Andric 285*06c3fb27SDimitry Andric T iv = static_cast<T>(original_ivs[ind]); 286*06c3fb27SDimitry Andric T outer_iv = static_cast<T>(original_ivs[bounds->outer_iv]); 287*06c3fb27SDimitry Andric 288*06c3fb27SDimitry Andric if (((bounds->comparison == comparison_t::comp_less_or_eq) && 289*06c3fb27SDimitry Andric (iv > (bounds->ub0 + bounds->ub1 * outer_iv))) || 290*06c3fb27SDimitry Andric ((bounds->comparison == comparison_t::comp_greater_or_eq) && 291*06c3fb27SDimitry Andric (iv < (bounds->ub0 + bounds->ub1 * outer_iv)))) { 292*06c3fb27SDimitry Andric // The calculated point is outside of loop upper boundary: 293*06c3fb27SDimitry Andric return false; 294*06c3fb27SDimitry Andric } 295*06c3fb27SDimitry Andric 296*06c3fb27SDimitry Andric return true; 297*06c3fb27SDimitry Andric } 298*06c3fb27SDimitry Andric 299*06c3fb27SDimitry Andric // Calculate one iv corresponding to iteration on the level ind. 300*06c3fb27SDimitry Andric // Return true if it fits into lower-upper bounds on this level 301*06c3fb27SDimitry Andric // (if not, we need to re-calculate) 302*06c3fb27SDimitry Andric template <typename T> 303*06c3fb27SDimitry Andric bool kmp_calc_one_iv_XX(const bounds_infoXX_template<T> *bounds, 304*06c3fb27SDimitry Andric /*in/out*/ kmp_point_t original_ivs, 305*06c3fb27SDimitry Andric const kmp_iterations_t iterations, kmp_index_t ind, 306*06c3fb27SDimitry Andric bool start_with_lower_bound, bool checkBounds) { 307*06c3fb27SDimitry Andric 308*06c3fb27SDimitry Andric kmp_uint64 temp = 0; 309*06c3fb27SDimitry Andric T outer_iv = static_cast<T>(original_ivs[bounds->outer_iv]); 310*06c3fb27SDimitry Andric 311*06c3fb27SDimitry Andric if (start_with_lower_bound) { 312*06c3fb27SDimitry Andric // we moved to the next iteration on one of outer loops, should start 313*06c3fb27SDimitry Andric // with the lower bound here: 314*06c3fb27SDimitry Andric temp = bounds->lb0 + bounds->lb1 * outer_iv; 315*06c3fb27SDimitry Andric } else { 316*06c3fb27SDimitry Andric auto iteration = iterations[ind]; 317*06c3fb27SDimitry Andric temp = bounds->lb0 + bounds->lb1 * outer_iv + iteration * bounds->step; 318*06c3fb27SDimitry Andric } 319*06c3fb27SDimitry Andric 320*06c3fb27SDimitry Andric // Now trim original iv according to its type: 321*06c3fb27SDimitry Andric original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp); 322*06c3fb27SDimitry Andric 323*06c3fb27SDimitry Andric if (checkBounds) { 324*06c3fb27SDimitry Andric return kmp_iv_is_in_upper_bound_XX(bounds, original_ivs, ind); 325*06c3fb27SDimitry Andric } else { 326*06c3fb27SDimitry Andric return true; 327*06c3fb27SDimitry Andric } 328*06c3fb27SDimitry Andric } 329*06c3fb27SDimitry Andric 330*06c3fb27SDimitry Andric bool kmp_calc_one_iv(const bounds_info_t *bounds, 331*06c3fb27SDimitry Andric /*in/out*/ kmp_point_t original_ivs, 332*06c3fb27SDimitry Andric const kmp_iterations_t iterations, kmp_index_t ind, 333*06c3fb27SDimitry Andric bool start_with_lower_bound, bool checkBounds) { 334*06c3fb27SDimitry Andric 335*06c3fb27SDimitry Andric switch (bounds->loop_type) { 336*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 337*06c3fb27SDimitry Andric return kmp_calc_one_iv_XX<kmp_int32>( 338*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int32> *)(bounds), 339*06c3fb27SDimitry Andric /*in/out*/ original_ivs, iterations, ind, start_with_lower_bound, 340*06c3fb27SDimitry Andric checkBounds); 341*06c3fb27SDimitry Andric break; 342*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 343*06c3fb27SDimitry Andric return kmp_calc_one_iv_XX<kmp_uint32>( 344*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint32> *)(bounds), 345*06c3fb27SDimitry Andric /*in/out*/ original_ivs, iterations, ind, start_with_lower_bound, 346*06c3fb27SDimitry Andric checkBounds); 347*06c3fb27SDimitry Andric break; 348*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 349*06c3fb27SDimitry Andric return kmp_calc_one_iv_XX<kmp_int64>( 350*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int64> *)(bounds), 351*06c3fb27SDimitry Andric /*in/out*/ original_ivs, iterations, ind, start_with_lower_bound, 352*06c3fb27SDimitry Andric checkBounds); 353*06c3fb27SDimitry Andric break; 354*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 355*06c3fb27SDimitry Andric return kmp_calc_one_iv_XX<kmp_uint64>( 356*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint64> *)(bounds), 357*06c3fb27SDimitry Andric /*in/out*/ original_ivs, iterations, ind, start_with_lower_bound, 358*06c3fb27SDimitry Andric checkBounds); 359*06c3fb27SDimitry Andric break; 360*06c3fb27SDimitry Andric default: 361*06c3fb27SDimitry Andric KMP_ASSERT(false); 362*06c3fb27SDimitry Andric return false; 363*06c3fb27SDimitry Andric } 364*06c3fb27SDimitry Andric } 365*06c3fb27SDimitry Andric 366*06c3fb27SDimitry Andric //----------Calculate original iv on one level for rectangular loop nest------ 367*06c3fb27SDimitry Andric 368*06c3fb27SDimitry Andric // Calculate one iv corresponding to iteration on the level ind. 369*06c3fb27SDimitry Andric // Return true if it fits into lower-upper bounds on this level 370*06c3fb27SDimitry Andric // (if not, we need to re-calculate) 371*06c3fb27SDimitry Andric template <typename T> 372*06c3fb27SDimitry Andric void kmp_calc_one_iv_rectang_XX(const bounds_infoXX_template<T> *bounds, 373*06c3fb27SDimitry Andric /*in/out*/ kmp_uint64 *original_ivs, 374*06c3fb27SDimitry Andric const kmp_iterations_t iterations, 375*06c3fb27SDimitry Andric kmp_index_t ind) { 376*06c3fb27SDimitry Andric 377*06c3fb27SDimitry Andric auto iteration = iterations[ind]; 378*06c3fb27SDimitry Andric 379*06c3fb27SDimitry Andric kmp_uint64 temp = 380*06c3fb27SDimitry Andric bounds->lb0 + 381*06c3fb27SDimitry Andric bounds->lb1 * static_cast<T>(original_ivs[bounds->outer_iv]) + 382*06c3fb27SDimitry Andric iteration * bounds->step; 383*06c3fb27SDimitry Andric 384*06c3fb27SDimitry Andric // Now trim original iv according to its type: 385*06c3fb27SDimitry Andric original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp); 386*06c3fb27SDimitry Andric } 387*06c3fb27SDimitry Andric 388*06c3fb27SDimitry Andric void kmp_calc_one_iv_rectang(const bounds_info_t *bounds, 389*06c3fb27SDimitry Andric /*in/out*/ kmp_uint64 *original_ivs, 390*06c3fb27SDimitry Andric const kmp_iterations_t iterations, 391*06c3fb27SDimitry Andric kmp_index_t ind) { 392*06c3fb27SDimitry Andric 393*06c3fb27SDimitry Andric switch (bounds->loop_type) { 394*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 395*06c3fb27SDimitry Andric kmp_calc_one_iv_rectang_XX<kmp_int32>( 396*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int32> *)(bounds), 397*06c3fb27SDimitry Andric /*in/out*/ original_ivs, iterations, ind); 398*06c3fb27SDimitry Andric break; 399*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 400*06c3fb27SDimitry Andric kmp_calc_one_iv_rectang_XX<kmp_uint32>( 401*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint32> *)(bounds), 402*06c3fb27SDimitry Andric /*in/out*/ original_ivs, iterations, ind); 403*06c3fb27SDimitry Andric break; 404*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 405*06c3fb27SDimitry Andric kmp_calc_one_iv_rectang_XX<kmp_int64>( 406*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int64> *)(bounds), 407*06c3fb27SDimitry Andric /*in/out*/ original_ivs, iterations, ind); 408*06c3fb27SDimitry Andric break; 409*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 410*06c3fb27SDimitry Andric kmp_calc_one_iv_rectang_XX<kmp_uint64>( 411*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint64> *)(bounds), 412*06c3fb27SDimitry Andric /*in/out*/ original_ivs, iterations, ind); 413*06c3fb27SDimitry Andric break; 414*06c3fb27SDimitry Andric default: 415*06c3fb27SDimitry Andric KMP_ASSERT(false); 416*06c3fb27SDimitry Andric } 417*06c3fb27SDimitry Andric } 418*06c3fb27SDimitry Andric 419*06c3fb27SDimitry Andric //---------------------------------------------------------------------------- 420*06c3fb27SDimitry Andric // Rectangular loop nest 421*06c3fb27SDimitry Andric //---------------------------------------------------------------------------- 422*06c3fb27SDimitry Andric 423*06c3fb27SDimitry Andric //----------Canonicalize loop nest and calculate trip count------------------- 424*06c3fb27SDimitry Andric 425*06c3fb27SDimitry Andric // Canonicalize loop nest and calculate overall trip count. 426*06c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread. 427*06c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form 428*06c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a 429*06c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array 430*06c3fb27SDimitry Andric // (only trip counts will be calculated). 431*06c3fb27SDimitry Andric // Returns trip count of overall space. 432*06c3fb27SDimitry Andric extern "C" kmp_loop_nest_iv_t 433*06c3fb27SDimitry Andric __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid, 434*06c3fb27SDimitry Andric /*in/out*/ bounds_info_t *original_bounds_nest, 435*06c3fb27SDimitry Andric kmp_index_t n) { 436*06c3fb27SDimitry Andric 437*06c3fb27SDimitry Andric kmp_canonicalize_loop_nest(loc, /*in/out*/ original_bounds_nest, n); 438*06c3fb27SDimitry Andric 439*06c3fb27SDimitry Andric kmp_loop_nest_iv_t total = 1; 440*06c3fb27SDimitry Andric 441*06c3fb27SDimitry Andric for (kmp_index_t ind = 0; ind < n; ++ind) { 442*06c3fb27SDimitry Andric auto bounds = &(original_bounds_nest[ind]); 443*06c3fb27SDimitry Andric 444*06c3fb27SDimitry Andric kmp_loop_nest_iv_t trip_count = kmp_calculate_trip_count(/*in/out*/ bounds); 445*06c3fb27SDimitry Andric total *= trip_count; 446*06c3fb27SDimitry Andric } 447*06c3fb27SDimitry Andric 448*06c3fb27SDimitry Andric return total; 449*06c3fb27SDimitry Andric } 450*06c3fb27SDimitry Andric 451*06c3fb27SDimitry Andric //----------Calculate old induction variables--------------------------------- 452*06c3fb27SDimitry Andric 453*06c3fb27SDimitry Andric // Calculate old induction variables corresponding to overall new_iv. 454*06c3fb27SDimitry Andric // Note: original IV will be returned as if it had kmp_uint64 type, 455*06c3fb27SDimitry Andric // will have to be converted to original type in user code. 456*06c3fb27SDimitry Andric // Note: trip counts should be already calculated by 457*06c3fb27SDimitry Andric // __kmpc_process_loop_nest_rectang. 458*06c3fb27SDimitry Andric // OMPTODO: special case 2, 3 nested loops: either do different 459*06c3fb27SDimitry Andric // interface without array or possibly template this over n 460*06c3fb27SDimitry Andric extern "C" void 461*06c3fb27SDimitry Andric __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv, 462*06c3fb27SDimitry Andric const bounds_info_t *original_bounds_nest, 463*06c3fb27SDimitry Andric /*out*/ kmp_uint64 *original_ivs, 464*06c3fb27SDimitry Andric kmp_index_t n) { 465*06c3fb27SDimitry Andric 466*06c3fb27SDimitry Andric kmp_iterations_t iterations = 467*06c3fb27SDimitry Andric (kmp_iterations_t)__kmp_allocate(sizeof(kmp_loop_nest_iv_t) * n); 468*06c3fb27SDimitry Andric 469*06c3fb27SDimitry Andric // First, calc corresponding iteration in every original loop: 470*06c3fb27SDimitry Andric for (kmp_index_t ind = n; ind > 0;) { 471*06c3fb27SDimitry Andric --ind; 472*06c3fb27SDimitry Andric auto bounds = &(original_bounds_nest[ind]); 473*06c3fb27SDimitry Andric 474*06c3fb27SDimitry Andric // should be optimized to OPDIVREM: 475*06c3fb27SDimitry Andric auto temp = new_iv / bounds->trip_count; 476*06c3fb27SDimitry Andric auto iteration = new_iv % bounds->trip_count; 477*06c3fb27SDimitry Andric new_iv = temp; 478*06c3fb27SDimitry Andric 479*06c3fb27SDimitry Andric iterations[ind] = iteration; 480*06c3fb27SDimitry Andric } 481*06c3fb27SDimitry Andric KMP_ASSERT(new_iv == 0); 482*06c3fb27SDimitry Andric 483*06c3fb27SDimitry Andric for (kmp_index_t ind = 0; ind < n; ++ind) { 484*06c3fb27SDimitry Andric auto bounds = &(original_bounds_nest[ind]); 485*06c3fb27SDimitry Andric 486*06c3fb27SDimitry Andric kmp_calc_one_iv_rectang(bounds, /*in/out*/ original_ivs, iterations, ind); 487*06c3fb27SDimitry Andric } 488*06c3fb27SDimitry Andric __kmp_free(iterations); 489*06c3fb27SDimitry Andric } 490*06c3fb27SDimitry Andric 491*06c3fb27SDimitry Andric //---------------------------------------------------------------------------- 492*06c3fb27SDimitry Andric // Non-rectangular loop nest 493*06c3fb27SDimitry Andric //---------------------------------------------------------------------------- 494*06c3fb27SDimitry Andric 495*06c3fb27SDimitry Andric //----------Calculate maximum possible span of iv values on one level--------- 496*06c3fb27SDimitry Andric 497*06c3fb27SDimitry Andric // Calculate span for IV on this loop level for "<=" case. 498*06c3fb27SDimitry Andric // Note: it's for <= on this loop nest level, so lower bound should be smallest 499*06c3fb27SDimitry Andric // value, upper bound should be the biggest value. If the loop won't execute, 500*06c3fb27SDimitry Andric // 'smallest' may be bigger than 'biggest', but we'd better not switch them 501*06c3fb27SDimitry Andric // around. 502*06c3fb27SDimitry Andric template <typename T> 503*06c3fb27SDimitry Andric void kmp_calc_span_lessoreq_XX( 504*06c3fb27SDimitry Andric /* in/out*/ bounds_info_internalXX_template<T> *bounds, 505*06c3fb27SDimitry Andric /* in/out*/ bounds_info_internal_t *bounds_nest) { 506*06c3fb27SDimitry Andric 507*06c3fb27SDimitry Andric typedef typename traits_t<T>::unsigned_t UT; 508*06c3fb27SDimitry Andric // typedef typename traits_t<T>::signed_t ST; 509*06c3fb27SDimitry Andric 510*06c3fb27SDimitry Andric // typedef typename big_span_t span_t; 511*06c3fb27SDimitry Andric typedef T span_t; 512*06c3fb27SDimitry Andric 513*06c3fb27SDimitry Andric auto &bbounds = bounds->b; 514*06c3fb27SDimitry Andric 515*06c3fb27SDimitry Andric if ((bbounds.lb1 != 0) || (bbounds.ub1 != 0)) { 516*06c3fb27SDimitry Andric // This dimention depends on one of previous ones; can't be the outermost 517*06c3fb27SDimitry Andric // one. 518*06c3fb27SDimitry Andric bounds_info_internalXX_template<T> *previous = 519*06c3fb27SDimitry Andric reinterpret_cast<bounds_info_internalXX_template<T> *>( 520*06c3fb27SDimitry Andric &(bounds_nest[bbounds.outer_iv])); 521*06c3fb27SDimitry Andric 522*06c3fb27SDimitry Andric // OMPTODO: assert that T is compatible with loop variable type on 523*06c3fb27SDimitry Andric // 'previous' loop 524*06c3fb27SDimitry Andric 525*06c3fb27SDimitry Andric { 526*06c3fb27SDimitry Andric span_t bound_candidate1 = 527*06c3fb27SDimitry Andric bbounds.lb0 + bbounds.lb1 * previous->span_smallest; 528*06c3fb27SDimitry Andric span_t bound_candidate2 = 529*06c3fb27SDimitry Andric bbounds.lb0 + bbounds.lb1 * previous->span_biggest; 530*06c3fb27SDimitry Andric if (bound_candidate1 < bound_candidate2) { 531*06c3fb27SDimitry Andric bounds->span_smallest = bound_candidate1; 532*06c3fb27SDimitry Andric } else { 533*06c3fb27SDimitry Andric bounds->span_smallest = bound_candidate2; 534*06c3fb27SDimitry Andric } 535*06c3fb27SDimitry Andric } 536*06c3fb27SDimitry Andric 537*06c3fb27SDimitry Andric { 538*06c3fb27SDimitry Andric // We can't adjust the upper bound with respect to step, because 539*06c3fb27SDimitry Andric // lower bound might be off after adjustments 540*06c3fb27SDimitry Andric 541*06c3fb27SDimitry Andric span_t bound_candidate1 = 542*06c3fb27SDimitry Andric bbounds.ub0 + bbounds.ub1 * previous->span_smallest; 543*06c3fb27SDimitry Andric span_t bound_candidate2 = 544*06c3fb27SDimitry Andric bbounds.ub0 + bbounds.ub1 * previous->span_biggest; 545*06c3fb27SDimitry Andric if (bound_candidate1 < bound_candidate2) { 546*06c3fb27SDimitry Andric bounds->span_biggest = bound_candidate2; 547*06c3fb27SDimitry Andric } else { 548*06c3fb27SDimitry Andric bounds->span_biggest = bound_candidate1; 549*06c3fb27SDimitry Andric } 550*06c3fb27SDimitry Andric } 551*06c3fb27SDimitry Andric } else { 552*06c3fb27SDimitry Andric // Rectangular: 553*06c3fb27SDimitry Andric bounds->span_smallest = bbounds.lb0; 554*06c3fb27SDimitry Andric bounds->span_biggest = bbounds.ub0; 555*06c3fb27SDimitry Andric } 556*06c3fb27SDimitry Andric if (!bounds->loop_bounds_adjusted) { 557*06c3fb27SDimitry Andric // Here it's safe to reduce the space to the multiply of step. 558*06c3fb27SDimitry Andric // OMPTODO: check if the formular is correct. 559*06c3fb27SDimitry Andric // Also check if it would be safe to do this if we didn't adjust left side. 560*06c3fb27SDimitry Andric bounds->span_biggest -= 561*06c3fb27SDimitry Andric (static_cast<UT>(bbounds.ub0 - bbounds.lb0)) % bbounds.step; // abs? 562*06c3fb27SDimitry Andric } 563*06c3fb27SDimitry Andric } 564*06c3fb27SDimitry Andric 565*06c3fb27SDimitry Andric // Calculate span for IV on this loop level for ">=" case. 566*06c3fb27SDimitry Andric template <typename T> 567*06c3fb27SDimitry Andric void kmp_calc_span_greateroreq_XX( 568*06c3fb27SDimitry Andric /* in/out*/ bounds_info_internalXX_template<T> *bounds, 569*06c3fb27SDimitry Andric /* in/out*/ bounds_info_internal_t *bounds_nest) { 570*06c3fb27SDimitry Andric 571*06c3fb27SDimitry Andric typedef typename traits_t<T>::unsigned_t UT; 572*06c3fb27SDimitry Andric // typedef typename traits_t<T>::signed_t ST; 573*06c3fb27SDimitry Andric 574*06c3fb27SDimitry Andric // typedef typename big_span_t span_t; 575*06c3fb27SDimitry Andric typedef T span_t; 576*06c3fb27SDimitry Andric 577*06c3fb27SDimitry Andric auto &bbounds = bounds->b; 578*06c3fb27SDimitry Andric 579*06c3fb27SDimitry Andric if ((bbounds.lb1 != 0) || (bbounds.ub1 != 0)) { 580*06c3fb27SDimitry Andric // This dimention depends on one of previous ones; can't be the outermost 581*06c3fb27SDimitry Andric // one. 582*06c3fb27SDimitry Andric bounds_info_internalXX_template<T> *previous = 583*06c3fb27SDimitry Andric reinterpret_cast<bounds_info_internalXX_template<T> *>( 584*06c3fb27SDimitry Andric &(bounds_nest[bbounds.outer_iv])); 585*06c3fb27SDimitry Andric 586*06c3fb27SDimitry Andric // OMPTODO: assert that T is compatible with loop variable type on 587*06c3fb27SDimitry Andric // 'previous' loop 588*06c3fb27SDimitry Andric 589*06c3fb27SDimitry Andric { 590*06c3fb27SDimitry Andric span_t bound_candidate1 = 591*06c3fb27SDimitry Andric bbounds.lb0 + bbounds.lb1 * previous->span_smallest; 592*06c3fb27SDimitry Andric span_t bound_candidate2 = 593*06c3fb27SDimitry Andric bbounds.lb0 + bbounds.lb1 * previous->span_biggest; 594*06c3fb27SDimitry Andric if (bound_candidate1 >= bound_candidate2) { 595*06c3fb27SDimitry Andric bounds->span_smallest = bound_candidate1; 596*06c3fb27SDimitry Andric } else { 597*06c3fb27SDimitry Andric bounds->span_smallest = bound_candidate2; 598*06c3fb27SDimitry Andric } 599*06c3fb27SDimitry Andric } 600*06c3fb27SDimitry Andric 601*06c3fb27SDimitry Andric { 602*06c3fb27SDimitry Andric // We can't adjust the upper bound with respect to step, because 603*06c3fb27SDimitry Andric // lower bound might be off after adjustments 604*06c3fb27SDimitry Andric 605*06c3fb27SDimitry Andric span_t bound_candidate1 = 606*06c3fb27SDimitry Andric bbounds.ub0 + bbounds.ub1 * previous->span_smallest; 607*06c3fb27SDimitry Andric span_t bound_candidate2 = 608*06c3fb27SDimitry Andric bbounds.ub0 + bbounds.ub1 * previous->span_biggest; 609*06c3fb27SDimitry Andric if (bound_candidate1 >= bound_candidate2) { 610*06c3fb27SDimitry Andric bounds->span_biggest = bound_candidate2; 611*06c3fb27SDimitry Andric } else { 612*06c3fb27SDimitry Andric bounds->span_biggest = bound_candidate1; 613*06c3fb27SDimitry Andric } 614*06c3fb27SDimitry Andric } 615*06c3fb27SDimitry Andric 616*06c3fb27SDimitry Andric } else { 617*06c3fb27SDimitry Andric // Rectangular: 618*06c3fb27SDimitry Andric bounds->span_biggest = bbounds.lb0; 619*06c3fb27SDimitry Andric bounds->span_smallest = bbounds.ub0; 620*06c3fb27SDimitry Andric } 621*06c3fb27SDimitry Andric if (!bounds->loop_bounds_adjusted) { 622*06c3fb27SDimitry Andric // Here it's safe to reduce the space to the multiply of step. 623*06c3fb27SDimitry Andric // OMPTODO: check if the formular is correct. 624*06c3fb27SDimitry Andric // Also check if it would be safe to do this if we didn't adjust left side. 625*06c3fb27SDimitry Andric bounds->span_biggest -= 626*06c3fb27SDimitry Andric (static_cast<UT>(bbounds.ub0 - bbounds.lb0)) % bbounds.step; // abs? 627*06c3fb27SDimitry Andric } 628*06c3fb27SDimitry Andric } 629*06c3fb27SDimitry Andric 630*06c3fb27SDimitry Andric // Calculate maximum possible span for IV on this loop level. 631*06c3fb27SDimitry Andric template <typename T> 632*06c3fb27SDimitry Andric void kmp_calc_span_XX( 633*06c3fb27SDimitry Andric /* in/out*/ bounds_info_internalXX_template<T> *bounds, 634*06c3fb27SDimitry Andric /* in/out*/ bounds_info_internal_t *bounds_nest) { 635*06c3fb27SDimitry Andric 636*06c3fb27SDimitry Andric if (bounds->b.comparison == comparison_t::comp_less_or_eq) { 637*06c3fb27SDimitry Andric kmp_calc_span_lessoreq_XX(/* in/out*/ bounds, /* in/out*/ bounds_nest); 638*06c3fb27SDimitry Andric } else { 639*06c3fb27SDimitry Andric KMP_ASSERT(bounds->b.comparison == comparison_t::comp_greater_or_eq); 640*06c3fb27SDimitry Andric kmp_calc_span_greateroreq_XX(/* in/out*/ bounds, /* in/out*/ bounds_nest); 641*06c3fb27SDimitry Andric } 642*06c3fb27SDimitry Andric } 643*06c3fb27SDimitry Andric 644*06c3fb27SDimitry Andric //----------All initial processing of the loop nest--------------------------- 645*06c3fb27SDimitry Andric 646*06c3fb27SDimitry Andric // Calculate new bounds for this loop level. 647*06c3fb27SDimitry Andric // To be able to work with the nest we need to get it to a parallelepiped shape. 648*06c3fb27SDimitry Andric // We need to stay in the original range of values, so that there will be no 649*06c3fb27SDimitry Andric // overflow, for that we'll adjust both upper and lower bounds as needed. 650*06c3fb27SDimitry Andric template <typename T> 651*06c3fb27SDimitry Andric void kmp_calc_new_bounds_XX( 652*06c3fb27SDimitry Andric /* in/out*/ bounds_info_internalXX_template<T> *bounds, 653*06c3fb27SDimitry Andric /* in/out*/ bounds_info_internal_t *bounds_nest) { 654*06c3fb27SDimitry Andric 655*06c3fb27SDimitry Andric auto &bbounds = bounds->b; 656*06c3fb27SDimitry Andric 657*06c3fb27SDimitry Andric if (bbounds.lb1 == bbounds.ub1) { 658*06c3fb27SDimitry Andric // Already parallel, no need to adjust: 659*06c3fb27SDimitry Andric bounds->loop_bounds_adjusted = false; 660*06c3fb27SDimitry Andric } else { 661*06c3fb27SDimitry Andric bounds->loop_bounds_adjusted = true; 662*06c3fb27SDimitry Andric 663*06c3fb27SDimitry Andric T old_lb1 = bbounds.lb1; 664*06c3fb27SDimitry Andric T old_ub1 = bbounds.ub1; 665*06c3fb27SDimitry Andric 666*06c3fb27SDimitry Andric if (__kmp_sign(old_lb1) != __kmp_sign(old_ub1)) { 667*06c3fb27SDimitry Andric // With this shape we can adjust to a rectangle: 668*06c3fb27SDimitry Andric bbounds.lb1 = 0; 669*06c3fb27SDimitry Andric bbounds.ub1 = 0; 670*06c3fb27SDimitry Andric } else { 671*06c3fb27SDimitry Andric // get upper and lower bounds to be parallel 672*06c3fb27SDimitry Andric // with values in the old range. 673*06c3fb27SDimitry Andric // Note: abs didn't work here. 674*06c3fb27SDimitry Andric if (((old_lb1 < 0) && (old_lb1 < old_ub1)) || 675*06c3fb27SDimitry Andric ((old_lb1 > 0) && (old_lb1 > old_ub1))) { 676*06c3fb27SDimitry Andric bbounds.lb1 = old_ub1; 677*06c3fb27SDimitry Andric } else { 678*06c3fb27SDimitry Andric bbounds.ub1 = old_lb1; 679*06c3fb27SDimitry Andric } 680*06c3fb27SDimitry Andric } 681*06c3fb27SDimitry Andric 682*06c3fb27SDimitry Andric // Now need to adjust lb0, ub0, otherwise in some cases space will shrink. 683*06c3fb27SDimitry Andric // The idea here that for this IV we are now getting the same span 684*06c3fb27SDimitry Andric // irrespective of the previous IV value. 685*06c3fb27SDimitry Andric bounds_info_internalXX_template<T> *previous = 686*06c3fb27SDimitry Andric reinterpret_cast<bounds_info_internalXX_template<T> *>( 687*06c3fb27SDimitry Andric &bounds_nest[bbounds.outer_iv]); 688*06c3fb27SDimitry Andric 689*06c3fb27SDimitry Andric if (bbounds.comparison == comparison_t::comp_less_or_eq) { 690*06c3fb27SDimitry Andric if (old_lb1 < bbounds.lb1) { 691*06c3fb27SDimitry Andric KMP_ASSERT(old_lb1 < 0); 692*06c3fb27SDimitry Andric // The length is good on outer_iv biggest number, 693*06c3fb27SDimitry Andric // can use it to find where to move the lower bound: 694*06c3fb27SDimitry Andric 695*06c3fb27SDimitry Andric T sub = (bbounds.lb1 - old_lb1) * previous->span_biggest; 696*06c3fb27SDimitry Andric bbounds.lb0 -= sub; // OMPTODO: what if it'll go out of unsigned space? 697*06c3fb27SDimitry Andric // e.g. it was 0?? (same below) 698*06c3fb27SDimitry Andric } else if (old_lb1 > bbounds.lb1) { 699*06c3fb27SDimitry Andric // still need to move lower bound: 700*06c3fb27SDimitry Andric T add = (old_lb1 - bbounds.lb1) * previous->span_smallest; 701*06c3fb27SDimitry Andric bbounds.lb0 += add; 702*06c3fb27SDimitry Andric } 703*06c3fb27SDimitry Andric 704*06c3fb27SDimitry Andric if (old_ub1 > bbounds.ub1) { 705*06c3fb27SDimitry Andric KMP_ASSERT(old_ub1 > 0); 706*06c3fb27SDimitry Andric // The length is good on outer_iv biggest number, 707*06c3fb27SDimitry Andric // can use it to find where to move upper bound: 708*06c3fb27SDimitry Andric 709*06c3fb27SDimitry Andric T add = (old_ub1 - bbounds.ub1) * previous->span_biggest; 710*06c3fb27SDimitry Andric bbounds.ub0 += add; 711*06c3fb27SDimitry Andric } else if (old_ub1 < bbounds.ub1) { 712*06c3fb27SDimitry Andric // still need to move upper bound: 713*06c3fb27SDimitry Andric T sub = (bbounds.ub1 - old_ub1) * previous->span_smallest; 714*06c3fb27SDimitry Andric bbounds.ub0 -= sub; 715*06c3fb27SDimitry Andric } 716*06c3fb27SDimitry Andric } else { 717*06c3fb27SDimitry Andric KMP_ASSERT(bbounds.comparison == comparison_t::comp_greater_or_eq); 718*06c3fb27SDimitry Andric if (old_lb1 < bbounds.lb1) { 719*06c3fb27SDimitry Andric KMP_ASSERT(old_lb1 < 0); 720*06c3fb27SDimitry Andric T sub = (bbounds.lb1 - old_lb1) * previous->span_smallest; 721*06c3fb27SDimitry Andric bbounds.lb0 -= sub; 722*06c3fb27SDimitry Andric } else if (old_lb1 > bbounds.lb1) { 723*06c3fb27SDimitry Andric T add = (old_lb1 - bbounds.lb1) * previous->span_biggest; 724*06c3fb27SDimitry Andric bbounds.lb0 += add; 725*06c3fb27SDimitry Andric } 726*06c3fb27SDimitry Andric 727*06c3fb27SDimitry Andric if (old_ub1 > bbounds.ub1) { 728*06c3fb27SDimitry Andric KMP_ASSERT(old_ub1 > 0); 729*06c3fb27SDimitry Andric T add = (old_ub1 - bbounds.ub1) * previous->span_smallest; 730*06c3fb27SDimitry Andric bbounds.ub0 += add; 731*06c3fb27SDimitry Andric } else if (old_ub1 < bbounds.ub1) { 732*06c3fb27SDimitry Andric T sub = (bbounds.ub1 - old_ub1) * previous->span_biggest; 733*06c3fb27SDimitry Andric bbounds.ub0 -= sub; 734*06c3fb27SDimitry Andric } 735*06c3fb27SDimitry Andric } 736*06c3fb27SDimitry Andric } 737*06c3fb27SDimitry Andric } 738*06c3fb27SDimitry Andric 739*06c3fb27SDimitry Andric // Do all processing for one canonicalized loop in the nest 740*06c3fb27SDimitry Andric // (assuming that outer loops already were processed): 741*06c3fb27SDimitry Andric template <typename T> 742*06c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_process_one_loop_XX( 743*06c3fb27SDimitry Andric /* in/out*/ bounds_info_internalXX_template<T> *bounds, 744*06c3fb27SDimitry Andric /*in/out*/ bounds_info_internal_t *bounds_nest) { 745*06c3fb27SDimitry Andric 746*06c3fb27SDimitry Andric kmp_calc_new_bounds_XX(/* in/out*/ bounds, /* in/out*/ bounds_nest); 747*06c3fb27SDimitry Andric kmp_calc_span_XX(/* in/out*/ bounds, /* in/out*/ bounds_nest); 748*06c3fb27SDimitry Andric return kmp_calculate_trip_count_XX(/*in/out*/ &(bounds->b)); 749*06c3fb27SDimitry Andric } 750*06c3fb27SDimitry Andric 751*06c3fb27SDimitry Andric // Non-rectangular loop nest, canonicalized to use <= or >=. 752*06c3fb27SDimitry Andric // Process loop nest to have a parallelepiped shape, 753*06c3fb27SDimitry Andric // calculate biggest spans for IV's on all levels and calculate overall trip 754*06c3fb27SDimitry Andric // count. "bounds_nest" has to be allocated per thread. 755*06c3fb27SDimitry Andric // Returns overall trip count (for adjusted space). 756*06c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_process_loop_nest( 757*06c3fb27SDimitry Andric /*in/out*/ bounds_info_internal_t *bounds_nest, kmp_index_t n) { 758*06c3fb27SDimitry Andric 759*06c3fb27SDimitry Andric kmp_loop_nest_iv_t total = 1; 760*06c3fb27SDimitry Andric 761*06c3fb27SDimitry Andric for (kmp_index_t ind = 0; ind < n; ++ind) { 762*06c3fb27SDimitry Andric auto bounds = &(bounds_nest[ind]); 763*06c3fb27SDimitry Andric kmp_loop_nest_iv_t trip_count = 0; 764*06c3fb27SDimitry Andric 765*06c3fb27SDimitry Andric switch (bounds->b.loop_type) { 766*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 767*06c3fb27SDimitry Andric trip_count = kmp_process_one_loop_XX<kmp_int32>( 768*06c3fb27SDimitry Andric /*in/out*/ (bounds_info_internalXX_template<kmp_int32> *)(bounds), 769*06c3fb27SDimitry Andric /*in/out*/ bounds_nest); 770*06c3fb27SDimitry Andric break; 771*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 772*06c3fb27SDimitry Andric trip_count = kmp_process_one_loop_XX<kmp_uint32>( 773*06c3fb27SDimitry Andric /*in/out*/ (bounds_info_internalXX_template<kmp_uint32> *)(bounds), 774*06c3fb27SDimitry Andric /*in/out*/ bounds_nest); 775*06c3fb27SDimitry Andric break; 776*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 777*06c3fb27SDimitry Andric trip_count = kmp_process_one_loop_XX<kmp_int64>( 778*06c3fb27SDimitry Andric /*in/out*/ (bounds_info_internalXX_template<kmp_int64> *)(bounds), 779*06c3fb27SDimitry Andric /*in/out*/ bounds_nest); 780*06c3fb27SDimitry Andric break; 781*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 782*06c3fb27SDimitry Andric trip_count = kmp_process_one_loop_XX<kmp_uint64>( 783*06c3fb27SDimitry Andric /*in/out*/ (bounds_info_internalXX_template<kmp_uint64> *)(bounds), 784*06c3fb27SDimitry Andric /*in/out*/ bounds_nest); 785*06c3fb27SDimitry Andric break; 786*06c3fb27SDimitry Andric default: 787*06c3fb27SDimitry Andric KMP_ASSERT(false); 788*06c3fb27SDimitry Andric } 789*06c3fb27SDimitry Andric total *= trip_count; 790*06c3fb27SDimitry Andric } 791*06c3fb27SDimitry Andric 792*06c3fb27SDimitry Andric return total; 793*06c3fb27SDimitry Andric } 794*06c3fb27SDimitry Andric 795*06c3fb27SDimitry Andric //----------Calculate iterations (in the original or updated space)----------- 796*06c3fb27SDimitry Andric 797*06c3fb27SDimitry Andric // Calculate number of iterations in original or updated space resulting in 798*06c3fb27SDimitry Andric // original_ivs[ind] (only on this level, non-negative) 799*06c3fb27SDimitry Andric // (not counting initial iteration) 800*06c3fb27SDimitry Andric template <typename T> 801*06c3fb27SDimitry Andric kmp_loop_nest_iv_t 802*06c3fb27SDimitry Andric kmp_calc_number_of_iterations_XX(const bounds_infoXX_template<T> *bounds, 803*06c3fb27SDimitry Andric const kmp_point_t original_ivs, 804*06c3fb27SDimitry Andric kmp_index_t ind) { 805*06c3fb27SDimitry Andric 806*06c3fb27SDimitry Andric kmp_loop_nest_iv_t iterations = 0; 807*06c3fb27SDimitry Andric 808*06c3fb27SDimitry Andric if (bounds->comparison == comparison_t::comp_less_or_eq) { 809*06c3fb27SDimitry Andric iterations = 810*06c3fb27SDimitry Andric (static_cast<T>(original_ivs[ind]) - bounds->lb0 - 811*06c3fb27SDimitry Andric bounds->lb1 * static_cast<T>(original_ivs[bounds->outer_iv])) / 812*06c3fb27SDimitry Andric __kmp_abs(bounds->step); 813*06c3fb27SDimitry Andric } else { 814*06c3fb27SDimitry Andric KMP_DEBUG_ASSERT(bounds->comparison == comparison_t::comp_greater_or_eq); 815*06c3fb27SDimitry Andric iterations = (bounds->lb0 + 816*06c3fb27SDimitry Andric bounds->lb1 * static_cast<T>(original_ivs[bounds->outer_iv]) - 817*06c3fb27SDimitry Andric static_cast<T>(original_ivs[ind])) / 818*06c3fb27SDimitry Andric __kmp_abs(bounds->step); 819*06c3fb27SDimitry Andric } 820*06c3fb27SDimitry Andric 821*06c3fb27SDimitry Andric return iterations; 822*06c3fb27SDimitry Andric } 823*06c3fb27SDimitry Andric 824*06c3fb27SDimitry Andric // Calculate number of iterations in the original or updated space resulting in 825*06c3fb27SDimitry Andric // original_ivs[ind] (only on this level, non-negative) 826*06c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_calc_number_of_iterations(const bounds_info_t *bounds, 827*06c3fb27SDimitry Andric const kmp_point_t original_ivs, 828*06c3fb27SDimitry Andric kmp_index_t ind) { 829*06c3fb27SDimitry Andric 830*06c3fb27SDimitry Andric switch (bounds->loop_type) { 831*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 832*06c3fb27SDimitry Andric return kmp_calc_number_of_iterations_XX<kmp_int32>( 833*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int32> *)(bounds), original_ivs, ind); 834*06c3fb27SDimitry Andric break; 835*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 836*06c3fb27SDimitry Andric return kmp_calc_number_of_iterations_XX<kmp_uint32>( 837*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint32> *)(bounds), original_ivs, ind); 838*06c3fb27SDimitry Andric break; 839*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 840*06c3fb27SDimitry Andric return kmp_calc_number_of_iterations_XX<kmp_int64>( 841*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int64> *)(bounds), original_ivs, ind); 842*06c3fb27SDimitry Andric break; 843*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 844*06c3fb27SDimitry Andric return kmp_calc_number_of_iterations_XX<kmp_uint64>( 845*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint64> *)(bounds), original_ivs, ind); 846*06c3fb27SDimitry Andric break; 847*06c3fb27SDimitry Andric default: 848*06c3fb27SDimitry Andric KMP_ASSERT(false); 849*06c3fb27SDimitry Andric return 0; 850*06c3fb27SDimitry Andric } 851*06c3fb27SDimitry Andric } 852*06c3fb27SDimitry Andric 853*06c3fb27SDimitry Andric //----------Calculate new iv corresponding to original ivs-------------------- 854*06c3fb27SDimitry Andric 855*06c3fb27SDimitry Andric // We got a point in the original loop nest. 856*06c3fb27SDimitry Andric // Take updated bounds and calculate what new_iv will correspond to this point. 857*06c3fb27SDimitry Andric // When we are getting original IVs from new_iv, we have to adjust to fit into 858*06c3fb27SDimitry Andric // original loops bounds. Getting new_iv for the adjusted original IVs will help 859*06c3fb27SDimitry Andric // with making more chunks non-empty. 860*06c3fb27SDimitry Andric kmp_loop_nest_iv_t 861*06c3fb27SDimitry Andric kmp_calc_new_iv_from_original_ivs(const bounds_info_internal_t *bounds_nest, 862*06c3fb27SDimitry Andric const kmp_point_t original_ivs, 863*06c3fb27SDimitry Andric kmp_index_t n) { 864*06c3fb27SDimitry Andric 865*06c3fb27SDimitry Andric kmp_loop_nest_iv_t new_iv = 0; 866*06c3fb27SDimitry Andric 867*06c3fb27SDimitry Andric for (kmp_index_t ind = 0; ind < n; ++ind) { 868*06c3fb27SDimitry Andric auto bounds = &(bounds_nest[ind].b); 869*06c3fb27SDimitry Andric 870*06c3fb27SDimitry Andric new_iv = new_iv * bounds->trip_count + 871*06c3fb27SDimitry Andric kmp_calc_number_of_iterations(bounds, original_ivs, ind); 872*06c3fb27SDimitry Andric } 873*06c3fb27SDimitry Andric 874*06c3fb27SDimitry Andric return new_iv; 875*06c3fb27SDimitry Andric } 876*06c3fb27SDimitry Andric 877*06c3fb27SDimitry Andric //----------Calculate original ivs for provided iterations-------------------- 878*06c3fb27SDimitry Andric 879*06c3fb27SDimitry Andric // Calculate original IVs for provided iterations, assuming iterations are 880*06c3fb27SDimitry Andric // calculated in the original space. 881*06c3fb27SDimitry Andric // Loop nest is in canonical form (with <= / >=). 882*06c3fb27SDimitry Andric bool kmp_calc_original_ivs_from_iterations( 883*06c3fb27SDimitry Andric const bounds_info_t *original_bounds_nest, kmp_index_t n, 884*06c3fb27SDimitry Andric /*in/out*/ kmp_point_t original_ivs, 885*06c3fb27SDimitry Andric /*in/out*/ kmp_iterations_t iterations, kmp_index_t ind) { 886*06c3fb27SDimitry Andric 887*06c3fb27SDimitry Andric kmp_index_t lengthened_ind = n; 888*06c3fb27SDimitry Andric 889*06c3fb27SDimitry Andric for (; ind < n;) { 890*06c3fb27SDimitry Andric auto bounds = &(original_bounds_nest[ind]); 891*06c3fb27SDimitry Andric bool good = kmp_calc_one_iv(bounds, /*in/out*/ original_ivs, iterations, 892*06c3fb27SDimitry Andric ind, (lengthened_ind < ind), true); 893*06c3fb27SDimitry Andric 894*06c3fb27SDimitry Andric if (!good) { 895*06c3fb27SDimitry Andric // The calculated iv value is too big (or too small for >=): 896*06c3fb27SDimitry Andric if (ind == 0) { 897*06c3fb27SDimitry Andric // Space is empty: 898*06c3fb27SDimitry Andric return false; 899*06c3fb27SDimitry Andric } else { 900*06c3fb27SDimitry Andric // Go to next iteration on the outer loop: 901*06c3fb27SDimitry Andric --ind; 902*06c3fb27SDimitry Andric ++iterations[ind]; 903*06c3fb27SDimitry Andric lengthened_ind = ind; 904*06c3fb27SDimitry Andric for (kmp_index_t i = ind + 1; i < n; ++i) { 905*06c3fb27SDimitry Andric iterations[i] = 0; 906*06c3fb27SDimitry Andric } 907*06c3fb27SDimitry Andric continue; 908*06c3fb27SDimitry Andric } 909*06c3fb27SDimitry Andric } 910*06c3fb27SDimitry Andric ++ind; 911*06c3fb27SDimitry Andric } 912*06c3fb27SDimitry Andric 913*06c3fb27SDimitry Andric return true; 914*06c3fb27SDimitry Andric } 915*06c3fb27SDimitry Andric 916*06c3fb27SDimitry Andric //----------Calculate original ivs for the beginning of the loop nest--------- 917*06c3fb27SDimitry Andric 918*06c3fb27SDimitry Andric // Calculate IVs for the beginning of the loop nest. 919*06c3fb27SDimitry Andric // Note: lower bounds of all loops may not work - 920*06c3fb27SDimitry Andric // if on some of the iterations of the outer loops inner loops are empty. 921*06c3fb27SDimitry Andric // Loop nest is in canonical form (with <= / >=). 922*06c3fb27SDimitry Andric bool kmp_calc_original_ivs_for_start(const bounds_info_t *original_bounds_nest, 923*06c3fb27SDimitry Andric kmp_index_t n, 924*06c3fb27SDimitry Andric /*out*/ kmp_point_t original_ivs) { 925*06c3fb27SDimitry Andric 926*06c3fb27SDimitry Andric // Iterations in the original space, multiplied by step: 927*06c3fb27SDimitry Andric kmp_iterations_t iterations = 928*06c3fb27SDimitry Andric (kmp_iterations_t)__kmp_allocate(sizeof(kmp_loop_nest_iv_t) * n); 929*06c3fb27SDimitry Andric 930*06c3fb27SDimitry Andric for (kmp_index_t ind = n; ind > 0;) { 931*06c3fb27SDimitry Andric --ind; 932*06c3fb27SDimitry Andric iterations[ind] = 0; 933*06c3fb27SDimitry Andric } 934*06c3fb27SDimitry Andric 935*06c3fb27SDimitry Andric // Now calculate the point: 936*06c3fb27SDimitry Andric bool b = kmp_calc_original_ivs_from_iterations(original_bounds_nest, n, 937*06c3fb27SDimitry Andric /*in/out*/ original_ivs, 938*06c3fb27SDimitry Andric /*in/out*/ iterations, 0); 939*06c3fb27SDimitry Andric __kmp_free(iterations); 940*06c3fb27SDimitry Andric return b; 941*06c3fb27SDimitry Andric } 942*06c3fb27SDimitry Andric 943*06c3fb27SDimitry Andric //----------Calculate next point in the original loop space------------------- 944*06c3fb27SDimitry Andric 945*06c3fb27SDimitry Andric // From current set of original IVs calculate next point. 946*06c3fb27SDimitry Andric // Return false if there is no next point in the loop bounds. 947*06c3fb27SDimitry Andric bool kmp_calc_next_original_ivs(const bounds_info_t *original_bounds_nest, 948*06c3fb27SDimitry Andric kmp_index_t n, const kmp_point_t original_ivs, 949*06c3fb27SDimitry Andric /*out*/ kmp_point_t next_original_ivs) { 950*06c3fb27SDimitry Andric // Iterations in the original space, multiplied by step (so can be negative): 951*06c3fb27SDimitry Andric kmp_iterations_t iterations = 952*06c3fb27SDimitry Andric (kmp_iterations_t)__kmp_allocate(sizeof(kmp_loop_nest_iv_t) * n); 953*06c3fb27SDimitry Andric 954*06c3fb27SDimitry Andric // First, calc corresponding iteration in every original loop: 955*06c3fb27SDimitry Andric for (kmp_index_t ind = 0; ind < n; ++ind) { 956*06c3fb27SDimitry Andric auto bounds = &(original_bounds_nest[ind]); 957*06c3fb27SDimitry Andric iterations[ind] = kmp_calc_number_of_iterations(bounds, original_ivs, ind); 958*06c3fb27SDimitry Andric } 959*06c3fb27SDimitry Andric 960*06c3fb27SDimitry Andric for (kmp_index_t ind = 0; ind < n; ++ind) { 961*06c3fb27SDimitry Andric next_original_ivs[ind] = original_ivs[ind]; 962*06c3fb27SDimitry Andric } 963*06c3fb27SDimitry Andric 964*06c3fb27SDimitry Andric // Next add one step to the iterations on the inner-most level, and see if we 965*06c3fb27SDimitry Andric // need to move up the nest: 966*06c3fb27SDimitry Andric kmp_index_t ind = n - 1; 967*06c3fb27SDimitry Andric ++iterations[ind]; 968*06c3fb27SDimitry Andric 969*06c3fb27SDimitry Andric bool b = kmp_calc_original_ivs_from_iterations( 970*06c3fb27SDimitry Andric original_bounds_nest, n, /*in/out*/ next_original_ivs, iterations, ind); 971*06c3fb27SDimitry Andric 972*06c3fb27SDimitry Andric __kmp_free(iterations); 973*06c3fb27SDimitry Andric return b; 974*06c3fb27SDimitry Andric } 975*06c3fb27SDimitry Andric 976*06c3fb27SDimitry Andric //----------Calculate chunk end in the original loop space-------------------- 977*06c3fb27SDimitry Andric 978*06c3fb27SDimitry Andric // For one level calculate old induction variable corresponding to overall 979*06c3fb27SDimitry Andric // new_iv for the chunk end. 980*06c3fb27SDimitry Andric // Return true if it fits into upper bound on this level 981*06c3fb27SDimitry Andric // (if not, we need to re-calculate) 982*06c3fb27SDimitry Andric template <typename T> 983*06c3fb27SDimitry Andric bool kmp_calc_one_iv_for_chunk_end_XX( 984*06c3fb27SDimitry Andric const bounds_infoXX_template<T> *bounds, 985*06c3fb27SDimitry Andric const bounds_infoXX_template<T> *updated_bounds, 986*06c3fb27SDimitry Andric /*in/out*/ kmp_point_t original_ivs, const kmp_iterations_t iterations, 987*06c3fb27SDimitry Andric kmp_index_t ind, bool start_with_lower_bound, bool compare_with_start, 988*06c3fb27SDimitry Andric const kmp_point_t original_ivs_start) { 989*06c3fb27SDimitry Andric 990*06c3fb27SDimitry Andric // typedef std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64> 991*06c3fb27SDimitry Andric // big_span_t; 992*06c3fb27SDimitry Andric 993*06c3fb27SDimitry Andric // OMPTODO: is it good enough, or do we need ST or do we need big_span_t? 994*06c3fb27SDimitry Andric T temp = 0; 995*06c3fb27SDimitry Andric 996*06c3fb27SDimitry Andric T outer_iv = static_cast<T>(original_ivs[bounds->outer_iv]); 997*06c3fb27SDimitry Andric 998*06c3fb27SDimitry Andric if (start_with_lower_bound) { 999*06c3fb27SDimitry Andric // we moved to the next iteration on one of outer loops, may as well use 1000*06c3fb27SDimitry Andric // the lower bound here: 1001*06c3fb27SDimitry Andric temp = bounds->lb0 + bounds->lb1 * outer_iv; 1002*06c3fb27SDimitry Andric } else { 1003*06c3fb27SDimitry Andric // Start in expanded space, but: 1004*06c3fb27SDimitry Andric // - we need to hit original space lower bound, so need to account for 1005*06c3fb27SDimitry Andric // that 1006*06c3fb27SDimitry Andric // - we have to go into original space, even if that means adding more 1007*06c3fb27SDimitry Andric // iterations than was planned 1008*06c3fb27SDimitry Andric // - we have to go past (or equal to) previous point (which is the chunk 1009*06c3fb27SDimitry Andric // starting point) 1010*06c3fb27SDimitry Andric 1011*06c3fb27SDimitry Andric auto iteration = iterations[ind]; 1012*06c3fb27SDimitry Andric 1013*06c3fb27SDimitry Andric auto step = bounds->step; 1014*06c3fb27SDimitry Andric 1015*06c3fb27SDimitry Andric // In case of >= it's negative: 1016*06c3fb27SDimitry Andric auto accountForStep = 1017*06c3fb27SDimitry Andric ((bounds->lb0 + bounds->lb1 * outer_iv) - 1018*06c3fb27SDimitry Andric (updated_bounds->lb0 + updated_bounds->lb1 * outer_iv)) % 1019*06c3fb27SDimitry Andric step; 1020*06c3fb27SDimitry Andric 1021*06c3fb27SDimitry Andric temp = updated_bounds->lb0 + updated_bounds->lb1 * outer_iv + 1022*06c3fb27SDimitry Andric accountForStep + iteration * step; 1023*06c3fb27SDimitry Andric 1024*06c3fb27SDimitry Andric if (((bounds->comparison == comparison_t::comp_less_or_eq) && 1025*06c3fb27SDimitry Andric (temp < (bounds->lb0 + bounds->lb1 * outer_iv))) || 1026*06c3fb27SDimitry Andric ((bounds->comparison == comparison_t::comp_greater_or_eq) && 1027*06c3fb27SDimitry Andric (temp > (bounds->lb0 + bounds->lb1 * outer_iv)))) { 1028*06c3fb27SDimitry Andric // Too small (or too big), didn't reach the original lower bound. Use 1029*06c3fb27SDimitry Andric // heuristic: 1030*06c3fb27SDimitry Andric temp = bounds->lb0 + bounds->lb1 * outer_iv + iteration / 2 * step; 1031*06c3fb27SDimitry Andric } 1032*06c3fb27SDimitry Andric 1033*06c3fb27SDimitry Andric if (compare_with_start) { 1034*06c3fb27SDimitry Andric 1035*06c3fb27SDimitry Andric T start = static_cast<T>(original_ivs_start[ind]); 1036*06c3fb27SDimitry Andric 1037*06c3fb27SDimitry Andric temp = kmp_fix_iv(bounds->loop_iv_type, temp); 1038*06c3fb27SDimitry Andric 1039*06c3fb27SDimitry Andric // On all previous levels start of the chunk is same as the end, need to 1040*06c3fb27SDimitry Andric // be really careful here: 1041*06c3fb27SDimitry Andric if (((bounds->comparison == comparison_t::comp_less_or_eq) && 1042*06c3fb27SDimitry Andric (temp < start)) || 1043*06c3fb27SDimitry Andric ((bounds->comparison == comparison_t::comp_greater_or_eq) && 1044*06c3fb27SDimitry Andric (temp > start))) { 1045*06c3fb27SDimitry Andric // End of the chunk can't be smaller (for >= bigger) than it's start. 1046*06c3fb27SDimitry Andric // Use heuristic: 1047*06c3fb27SDimitry Andric temp = start + iteration / 4 * step; 1048*06c3fb27SDimitry Andric } 1049*06c3fb27SDimitry Andric } 1050*06c3fb27SDimitry Andric } 1051*06c3fb27SDimitry Andric 1052*06c3fb27SDimitry Andric original_ivs[ind] = temp = kmp_fix_iv(bounds->loop_iv_type, temp); 1053*06c3fb27SDimitry Andric 1054*06c3fb27SDimitry Andric if (((bounds->comparison == comparison_t::comp_less_or_eq) && 1055*06c3fb27SDimitry Andric (temp > (bounds->ub0 + bounds->ub1 * outer_iv))) || 1056*06c3fb27SDimitry Andric ((bounds->comparison == comparison_t::comp_greater_or_eq) && 1057*06c3fb27SDimitry Andric (temp < (bounds->ub0 + bounds->ub1 * outer_iv)))) { 1058*06c3fb27SDimitry Andric // Too big (or too small for >=). 1059*06c3fb27SDimitry Andric return false; 1060*06c3fb27SDimitry Andric } 1061*06c3fb27SDimitry Andric 1062*06c3fb27SDimitry Andric return true; 1063*06c3fb27SDimitry Andric } 1064*06c3fb27SDimitry Andric 1065*06c3fb27SDimitry Andric // For one level calculate old induction variable corresponding to overall 1066*06c3fb27SDimitry Andric // new_iv for the chunk end. 1067*06c3fb27SDimitry Andric bool kmp_calc_one_iv_for_chunk_end(const bounds_info_t *bounds, 1068*06c3fb27SDimitry Andric const bounds_info_t *updated_bounds, 1069*06c3fb27SDimitry Andric /*in/out*/ kmp_point_t original_ivs, 1070*06c3fb27SDimitry Andric const kmp_iterations_t iterations, 1071*06c3fb27SDimitry Andric kmp_index_t ind, bool start_with_lower_bound, 1072*06c3fb27SDimitry Andric bool compare_with_start, 1073*06c3fb27SDimitry Andric const kmp_point_t original_ivs_start) { 1074*06c3fb27SDimitry Andric 1075*06c3fb27SDimitry Andric switch (bounds->loop_type) { 1076*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 1077*06c3fb27SDimitry Andric return kmp_calc_one_iv_for_chunk_end_XX<kmp_int32>( 1078*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int32> *)(bounds), 1079*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int32> *)(updated_bounds), 1080*06c3fb27SDimitry Andric /*in/out*/ 1081*06c3fb27SDimitry Andric original_ivs, iterations, ind, start_with_lower_bound, 1082*06c3fb27SDimitry Andric compare_with_start, original_ivs_start); 1083*06c3fb27SDimitry Andric break; 1084*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 1085*06c3fb27SDimitry Andric return kmp_calc_one_iv_for_chunk_end_XX<kmp_uint32>( 1086*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint32> *)(bounds), 1087*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint32> *)(updated_bounds), 1088*06c3fb27SDimitry Andric /*in/out*/ 1089*06c3fb27SDimitry Andric original_ivs, iterations, ind, start_with_lower_bound, 1090*06c3fb27SDimitry Andric compare_with_start, original_ivs_start); 1091*06c3fb27SDimitry Andric break; 1092*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 1093*06c3fb27SDimitry Andric return kmp_calc_one_iv_for_chunk_end_XX<kmp_int64>( 1094*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int64> *)(bounds), 1095*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int64> *)(updated_bounds), 1096*06c3fb27SDimitry Andric /*in/out*/ 1097*06c3fb27SDimitry Andric original_ivs, iterations, ind, start_with_lower_bound, 1098*06c3fb27SDimitry Andric compare_with_start, original_ivs_start); 1099*06c3fb27SDimitry Andric break; 1100*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 1101*06c3fb27SDimitry Andric return kmp_calc_one_iv_for_chunk_end_XX<kmp_uint64>( 1102*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint64> *)(bounds), 1103*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint64> *)(updated_bounds), 1104*06c3fb27SDimitry Andric /*in/out*/ 1105*06c3fb27SDimitry Andric original_ivs, iterations, ind, start_with_lower_bound, 1106*06c3fb27SDimitry Andric compare_with_start, original_ivs_start); 1107*06c3fb27SDimitry Andric break; 1108*06c3fb27SDimitry Andric default: 1109*06c3fb27SDimitry Andric KMP_ASSERT(false); 1110*06c3fb27SDimitry Andric return false; 1111*06c3fb27SDimitry Andric } 1112*06c3fb27SDimitry Andric } 1113*06c3fb27SDimitry Andric 1114*06c3fb27SDimitry Andric // Calculate old induction variables corresponding to overall new_iv for the 1115*06c3fb27SDimitry Andric // chunk end. If due to space extension we are getting old IVs outside of the 1116*06c3fb27SDimitry Andric // boundaries, bring them into the boundaries. Need to do this in the runtime, 1117*06c3fb27SDimitry Andric // esp. on the lower bounds side. When getting result need to make sure that the 1118*06c3fb27SDimitry Andric // new chunk starts at next position to old chunk, not overlaps with it (this is 1119*06c3fb27SDimitry Andric // done elsewhere), and need to make sure end of the chunk is further than the 1120*06c3fb27SDimitry Andric // beginning of the chunk. We don't need an exact ending point here, just 1121*06c3fb27SDimitry Andric // something more-or-less close to the desired chunk length, bigger is fine 1122*06c3fb27SDimitry Andric // (smaller would be fine, but we risk going into infinite loop, so do smaller 1123*06c3fb27SDimitry Andric // only at the very end of the space). result: false if could not find the 1124*06c3fb27SDimitry Andric // ending point in the original loop space. In this case the caller can use 1125*06c3fb27SDimitry Andric // original upper bounds as the end of the chunk. Chunk won't be empty, because 1126*06c3fb27SDimitry Andric // it'll have at least the starting point, which is by construction in the 1127*06c3fb27SDimitry Andric // original space. 1128*06c3fb27SDimitry Andric bool kmp_calc_original_ivs_for_chunk_end( 1129*06c3fb27SDimitry Andric const bounds_info_t *original_bounds_nest, kmp_index_t n, 1130*06c3fb27SDimitry Andric const bounds_info_internal_t *updated_bounds_nest, 1131*06c3fb27SDimitry Andric const kmp_point_t original_ivs_start, kmp_loop_nest_iv_t new_iv, 1132*06c3fb27SDimitry Andric /*out*/ kmp_point_t original_ivs) { 1133*06c3fb27SDimitry Andric 1134*06c3fb27SDimitry Andric // Iterations in the expanded space: 1135*06c3fb27SDimitry Andric kmp_iterations_t iterations = 1136*06c3fb27SDimitry Andric (kmp_iterations_t)__kmp_allocate(sizeof(kmp_loop_nest_iv_t) * n); 1137*06c3fb27SDimitry Andric 1138*06c3fb27SDimitry Andric // First, calc corresponding iteration in every modified loop: 1139*06c3fb27SDimitry Andric for (kmp_index_t ind = n; ind > 0;) { 1140*06c3fb27SDimitry Andric --ind; 1141*06c3fb27SDimitry Andric auto &updated_bounds = updated_bounds_nest[ind]; 1142*06c3fb27SDimitry Andric 1143*06c3fb27SDimitry Andric // should be optimized to OPDIVREM: 1144*06c3fb27SDimitry Andric auto new_ind = new_iv / updated_bounds.b.trip_count; 1145*06c3fb27SDimitry Andric auto iteration = new_iv % updated_bounds.b.trip_count; 1146*06c3fb27SDimitry Andric 1147*06c3fb27SDimitry Andric new_iv = new_ind; 1148*06c3fb27SDimitry Andric iterations[ind] = iteration; 1149*06c3fb27SDimitry Andric } 1150*06c3fb27SDimitry Andric KMP_DEBUG_ASSERT(new_iv == 0); 1151*06c3fb27SDimitry Andric 1152*06c3fb27SDimitry Andric kmp_index_t lengthened_ind = n; 1153*06c3fb27SDimitry Andric kmp_index_t equal_ind = -1; 1154*06c3fb27SDimitry Andric 1155*06c3fb27SDimitry Andric // Next calculate the point, but in original loop nest. 1156*06c3fb27SDimitry Andric for (kmp_index_t ind = 0; ind < n;) { 1157*06c3fb27SDimitry Andric auto bounds = &(original_bounds_nest[ind]); 1158*06c3fb27SDimitry Andric auto updated_bounds = &(updated_bounds_nest[ind].b); 1159*06c3fb27SDimitry Andric 1160*06c3fb27SDimitry Andric bool good = kmp_calc_one_iv_for_chunk_end( 1161*06c3fb27SDimitry Andric bounds, updated_bounds, 1162*06c3fb27SDimitry Andric /*in/out*/ original_ivs, iterations, ind, (lengthened_ind < ind), 1163*06c3fb27SDimitry Andric (equal_ind >= ind - 1), original_ivs_start); 1164*06c3fb27SDimitry Andric 1165*06c3fb27SDimitry Andric if (!good) { 1166*06c3fb27SDimitry Andric // Too big (or too small for >=). 1167*06c3fb27SDimitry Andric if (ind == 0) { 1168*06c3fb27SDimitry Andric // Need to reduce to the end. 1169*06c3fb27SDimitry Andric __kmp_free(iterations); 1170*06c3fb27SDimitry Andric return false; 1171*06c3fb27SDimitry Andric } else { 1172*06c3fb27SDimitry Andric // Go to next iteration on outer loop: 1173*06c3fb27SDimitry Andric --ind; 1174*06c3fb27SDimitry Andric ++(iterations[ind]); 1175*06c3fb27SDimitry Andric lengthened_ind = ind; 1176*06c3fb27SDimitry Andric if (equal_ind >= lengthened_ind) { 1177*06c3fb27SDimitry Andric // We've changed the number of iterations here, 1178*06c3fb27SDimitry Andric // can't be same anymore: 1179*06c3fb27SDimitry Andric equal_ind = lengthened_ind - 1; 1180*06c3fb27SDimitry Andric } 1181*06c3fb27SDimitry Andric for (kmp_index_t i = ind + 1; i < n; ++i) { 1182*06c3fb27SDimitry Andric iterations[i] = 0; 1183*06c3fb27SDimitry Andric } 1184*06c3fb27SDimitry Andric continue; 1185*06c3fb27SDimitry Andric } 1186*06c3fb27SDimitry Andric } 1187*06c3fb27SDimitry Andric 1188*06c3fb27SDimitry Andric if ((equal_ind == ind - 1) && 1189*06c3fb27SDimitry Andric (kmp_ivs_eq(bounds->loop_iv_type, original_ivs[ind], 1190*06c3fb27SDimitry Andric original_ivs_start[ind]))) { 1191*06c3fb27SDimitry Andric equal_ind = ind; 1192*06c3fb27SDimitry Andric } else if ((equal_ind > ind - 1) && 1193*06c3fb27SDimitry Andric !(kmp_ivs_eq(bounds->loop_iv_type, original_ivs[ind], 1194*06c3fb27SDimitry Andric original_ivs_start[ind]))) { 1195*06c3fb27SDimitry Andric equal_ind = ind - 1; 1196*06c3fb27SDimitry Andric } 1197*06c3fb27SDimitry Andric ++ind; 1198*06c3fb27SDimitry Andric } 1199*06c3fb27SDimitry Andric 1200*06c3fb27SDimitry Andric __kmp_free(iterations); 1201*06c3fb27SDimitry Andric return true; 1202*06c3fb27SDimitry Andric } 1203*06c3fb27SDimitry Andric 1204*06c3fb27SDimitry Andric //----------Calculate upper bounds for the last chunk------------------------- 1205*06c3fb27SDimitry Andric 1206*06c3fb27SDimitry Andric // Calculate one upper bound for the end. 1207*06c3fb27SDimitry Andric template <typename T> 1208*06c3fb27SDimitry Andric void kmp_calc_one_iv_end_XX(const bounds_infoXX_template<T> *bounds, 1209*06c3fb27SDimitry Andric /*in/out*/ kmp_point_t original_ivs, 1210*06c3fb27SDimitry Andric kmp_index_t ind) { 1211*06c3fb27SDimitry Andric 1212*06c3fb27SDimitry Andric T temp = bounds->ub0 + 1213*06c3fb27SDimitry Andric bounds->ub1 * static_cast<T>(original_ivs[bounds->outer_iv]); 1214*06c3fb27SDimitry Andric 1215*06c3fb27SDimitry Andric original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp); 1216*06c3fb27SDimitry Andric } 1217*06c3fb27SDimitry Andric 1218*06c3fb27SDimitry Andric void kmp_calc_one_iv_end(const bounds_info_t *bounds, 1219*06c3fb27SDimitry Andric /*in/out*/ kmp_point_t original_ivs, kmp_index_t ind) { 1220*06c3fb27SDimitry Andric 1221*06c3fb27SDimitry Andric switch (bounds->loop_type) { 1222*06c3fb27SDimitry Andric default: 1223*06c3fb27SDimitry Andric KMP_ASSERT(false); 1224*06c3fb27SDimitry Andric break; 1225*06c3fb27SDimitry Andric case loop_type_t::loop_type_int32: 1226*06c3fb27SDimitry Andric kmp_calc_one_iv_end_XX<kmp_int32>( 1227*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int32> *)(bounds), 1228*06c3fb27SDimitry Andric /*in/out*/ original_ivs, ind); 1229*06c3fb27SDimitry Andric break; 1230*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint32: 1231*06c3fb27SDimitry Andric kmp_calc_one_iv_end_XX<kmp_uint32>( 1232*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint32> *)(bounds), 1233*06c3fb27SDimitry Andric /*in/out*/ original_ivs, ind); 1234*06c3fb27SDimitry Andric break; 1235*06c3fb27SDimitry Andric case loop_type_t::loop_type_int64: 1236*06c3fb27SDimitry Andric kmp_calc_one_iv_end_XX<kmp_int64>( 1237*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_int64> *)(bounds), 1238*06c3fb27SDimitry Andric /*in/out*/ original_ivs, ind); 1239*06c3fb27SDimitry Andric break; 1240*06c3fb27SDimitry Andric case loop_type_t::loop_type_uint64: 1241*06c3fb27SDimitry Andric kmp_calc_one_iv_end_XX<kmp_uint64>( 1242*06c3fb27SDimitry Andric (bounds_infoXX_template<kmp_uint64> *)(bounds), 1243*06c3fb27SDimitry Andric /*in/out*/ original_ivs, ind); 1244*06c3fb27SDimitry Andric break; 1245*06c3fb27SDimitry Andric } 1246*06c3fb27SDimitry Andric } 1247*06c3fb27SDimitry Andric 1248*06c3fb27SDimitry Andric // Calculate upper bounds for the last loop iteration. Just use original upper 1249*06c3fb27SDimitry Andric // bounds (adjusted when canonicalized to use <= / >=). No need to check that 1250*06c3fb27SDimitry Andric // this point is in the original space (it's likely not) 1251*06c3fb27SDimitry Andric void kmp_calc_original_ivs_for_end( 1252*06c3fb27SDimitry Andric const bounds_info_t *const original_bounds_nest, kmp_index_t n, 1253*06c3fb27SDimitry Andric /*out*/ kmp_point_t original_ivs) { 1254*06c3fb27SDimitry Andric for (kmp_index_t ind = 0; ind < n; ++ind) { 1255*06c3fb27SDimitry Andric auto bounds = &(original_bounds_nest[ind]); 1256*06c3fb27SDimitry Andric kmp_calc_one_iv_end(bounds, /*in/out*/ original_ivs, ind); 1257*06c3fb27SDimitry Andric } 1258*06c3fb27SDimitry Andric } 1259*06c3fb27SDimitry Andric 1260*06c3fb27SDimitry Andric //----------Init API for non-rectangular loops-------------------------------- 1261*06c3fb27SDimitry Andric 1262*06c3fb27SDimitry Andric // Init API for collapsed loops (static, no chunks defined). 1263*06c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread. 1264*06c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form 1265*06c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a 1266*06c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array 1267*06c3fb27SDimitry Andric // (only trip counts will be calculated). Internally API will expand the space 1268*06c3fb27SDimitry Andric // to parallelogram/parallelepiped, calculate total, calculate bounds for the 1269*06c3fb27SDimitry Andric // chunks in terms of the new IV, re-calc them in terms of old IVs (especially 1270*06c3fb27SDimitry Andric // important on the left side, to hit the lower bounds and not step over), and 1271*06c3fb27SDimitry Andric // pick the correct chunk for this thread (so it will calculate chunks up to the 1272*06c3fb27SDimitry Andric // needed one). It could be optimized to calculate just this chunk, potentially 1273*06c3fb27SDimitry Andric // a bit less well distributed among threads. It is designed to make sure that 1274*06c3fb27SDimitry Andric // threads will receive predictable chunks, deterministically (so that next nest 1275*06c3fb27SDimitry Andric // of loops with similar characteristics will get exactly same chunks on same 1276*06c3fb27SDimitry Andric // threads). 1277*06c3fb27SDimitry Andric // Current contract: chunk_bounds_nest has only lb0 and ub0, 1278*06c3fb27SDimitry Andric // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future). 1279*06c3fb27SDimitry Andric extern "C" kmp_int32 1280*06c3fb27SDimitry Andric __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, 1281*06c3fb27SDimitry Andric /*in/out*/ bounds_info_t *original_bounds_nest, 1282*06c3fb27SDimitry Andric /*out*/ bounds_info_t *chunk_bounds_nest, 1283*06c3fb27SDimitry Andric kmp_index_t n, /*out*/ kmp_int32 *plastiter) { 1284*06c3fb27SDimitry Andric 1285*06c3fb27SDimitry Andric KMP_DEBUG_ASSERT(plastiter && original_bounds_nest); 1286*06c3fb27SDimitry Andric KE_TRACE(10, ("__kmpc_for_collapsed_init called (%d)\n", gtid)); 1287*06c3fb27SDimitry Andric 1288*06c3fb27SDimitry Andric if (__kmp_env_consistency_check) { 1289*06c3fb27SDimitry Andric __kmp_push_workshare(gtid, ct_pdo, loc); 1290*06c3fb27SDimitry Andric } 1291*06c3fb27SDimitry Andric 1292*06c3fb27SDimitry Andric kmp_canonicalize_loop_nest(loc, /*in/out*/ original_bounds_nest, n); 1293*06c3fb27SDimitry Andric 1294*06c3fb27SDimitry Andric bounds_info_internal_t *updated_bounds_nest = 1295*06c3fb27SDimitry Andric (bounds_info_internal_t *)__kmp_allocate(sizeof(bounds_info_internal_t) * 1296*06c3fb27SDimitry Andric n); 1297*06c3fb27SDimitry Andric 1298*06c3fb27SDimitry Andric for (kmp_index_t i = 0; i < n; ++i) { 1299*06c3fb27SDimitry Andric updated_bounds_nest[i].b = original_bounds_nest[i]; 1300*06c3fb27SDimitry Andric } 1301*06c3fb27SDimitry Andric 1302*06c3fb27SDimitry Andric kmp_loop_nest_iv_t total = 1303*06c3fb27SDimitry Andric kmp_process_loop_nest(/*in/out*/ updated_bounds_nest, n); 1304*06c3fb27SDimitry Andric 1305*06c3fb27SDimitry Andric if (plastiter != NULL) { 1306*06c3fb27SDimitry Andric *plastiter = FALSE; 1307*06c3fb27SDimitry Andric } 1308*06c3fb27SDimitry Andric 1309*06c3fb27SDimitry Andric if (total == 0) { 1310*06c3fb27SDimitry Andric // Loop won't execute: 1311*06c3fb27SDimitry Andric __kmp_free(updated_bounds_nest); 1312*06c3fb27SDimitry Andric return FALSE; 1313*06c3fb27SDimitry Andric } 1314*06c3fb27SDimitry Andric 1315*06c3fb27SDimitry Andric // OMPTODO: DISTRIBUTE is not supported yet 1316*06c3fb27SDimitry Andric __kmp_assert_valid_gtid(gtid); 1317*06c3fb27SDimitry Andric kmp_uint32 tid = __kmp_tid_from_gtid(gtid); 1318*06c3fb27SDimitry Andric 1319*06c3fb27SDimitry Andric kmp_info_t *th = __kmp_threads[gtid]; 1320*06c3fb27SDimitry Andric kmp_team_t *team = th->th.th_team; 1321*06c3fb27SDimitry Andric kmp_uint32 nth = team->t.t_nproc; // Number of threads 1322*06c3fb27SDimitry Andric 1323*06c3fb27SDimitry Andric KMP_DEBUG_ASSERT(tid < nth); 1324*06c3fb27SDimitry Andric 1325*06c3fb27SDimitry Andric kmp_point_t original_ivs_start = 1326*06c3fb27SDimitry Andric (kmp_point_t)__kmp_allocate(sizeof(kmp_uint64) * n); 1327*06c3fb27SDimitry Andric kmp_point_t original_ivs_end = 1328*06c3fb27SDimitry Andric (kmp_point_t)__kmp_allocate(sizeof(kmp_uint64) * n); 1329*06c3fb27SDimitry Andric kmp_point_t original_ivs_next_start = 1330*06c3fb27SDimitry Andric (kmp_point_t)__kmp_allocate(sizeof(kmp_uint64) * n); 1331*06c3fb27SDimitry Andric 1332*06c3fb27SDimitry Andric if (!kmp_calc_original_ivs_for_start(original_bounds_nest, n, 1333*06c3fb27SDimitry Andric /*out*/ original_ivs_start)) { 1334*06c3fb27SDimitry Andric // Loop won't execute: 1335*06c3fb27SDimitry Andric __kmp_free(updated_bounds_nest); 1336*06c3fb27SDimitry Andric __kmp_free(original_ivs_start); 1337*06c3fb27SDimitry Andric __kmp_free(original_ivs_end); 1338*06c3fb27SDimitry Andric __kmp_free(original_ivs_next_start); 1339*06c3fb27SDimitry Andric return FALSE; 1340*06c3fb27SDimitry Andric } 1341*06c3fb27SDimitry Andric 1342*06c3fb27SDimitry Andric // Not doing this optimization for one thread: 1343*06c3fb27SDimitry Andric // (1) more to test 1344*06c3fb27SDimitry Andric // (2) without it current contract that chunk_bounds_nest has only lb0 and 1345*06c3fb27SDimitry Andric // ub0, lb1 and ub1 are set to 0 and can be ignored. 1346*06c3fb27SDimitry Andric // if (nth == 1) { 1347*06c3fb27SDimitry Andric // // One thread: 1348*06c3fb27SDimitry Andric // // Copy all info from original_bounds_nest, it'll be good enough. 1349*06c3fb27SDimitry Andric 1350*06c3fb27SDimitry Andric // for (kmp_index_t i = 0; i < n; ++i) { 1351*06c3fb27SDimitry Andric // chunk_bounds_nest[i] = original_bounds_nest[i]; 1352*06c3fb27SDimitry Andric // } 1353*06c3fb27SDimitry Andric 1354*06c3fb27SDimitry Andric // if (plastiter != NULL) { 1355*06c3fb27SDimitry Andric // *plastiter = TRUE; 1356*06c3fb27SDimitry Andric // } 1357*06c3fb27SDimitry Andric // __kmp_free(updated_bounds_nest); 1358*06c3fb27SDimitry Andric // __kmp_free(original_ivs_start); 1359*06c3fb27SDimitry Andric // __kmp_free(original_ivs_end); 1360*06c3fb27SDimitry Andric // __kmp_free(original_ivs_next_start); 1361*06c3fb27SDimitry Andric // return TRUE; 1362*06c3fb27SDimitry Andric //} 1363*06c3fb27SDimitry Andric 1364*06c3fb27SDimitry Andric kmp_loop_nest_iv_t new_iv = kmp_calc_new_iv_from_original_ivs( 1365*06c3fb27SDimitry Andric updated_bounds_nest, original_ivs_start, n); 1366*06c3fb27SDimitry Andric 1367*06c3fb27SDimitry Andric bool last_iter = false; 1368*06c3fb27SDimitry Andric 1369*06c3fb27SDimitry Andric for (; nth > 0;) { 1370*06c3fb27SDimitry Andric // We could calculate chunk size once, but this is to compensate that the 1371*06c3fb27SDimitry Andric // original space is not parallelepiped and some threads can be left 1372*06c3fb27SDimitry Andric // without work: 1373*06c3fb27SDimitry Andric KMP_DEBUG_ASSERT(total >= new_iv); 1374*06c3fb27SDimitry Andric 1375*06c3fb27SDimitry Andric kmp_loop_nest_iv_t total_left = total - new_iv; 1376*06c3fb27SDimitry Andric kmp_loop_nest_iv_t chunk_size = total_left / nth; 1377*06c3fb27SDimitry Andric kmp_loop_nest_iv_t remainder = total_left % nth; 1378*06c3fb27SDimitry Andric 1379*06c3fb27SDimitry Andric kmp_loop_nest_iv_t curr_chunk_size = chunk_size; 1380*06c3fb27SDimitry Andric 1381*06c3fb27SDimitry Andric if (remainder > 0) { 1382*06c3fb27SDimitry Andric ++curr_chunk_size; 1383*06c3fb27SDimitry Andric --remainder; 1384*06c3fb27SDimitry Andric } 1385*06c3fb27SDimitry Andric 1386*06c3fb27SDimitry Andric #if defined(KMP_DEBUG) 1387*06c3fb27SDimitry Andric kmp_loop_nest_iv_t new_iv_for_start = new_iv; 1388*06c3fb27SDimitry Andric #endif 1389*06c3fb27SDimitry Andric 1390*06c3fb27SDimitry Andric if (curr_chunk_size > 1) { 1391*06c3fb27SDimitry Andric new_iv += curr_chunk_size - 1; 1392*06c3fb27SDimitry Andric } 1393*06c3fb27SDimitry Andric 1394*06c3fb27SDimitry Andric if ((nth == 1) || (new_iv >= total - 1)) { 1395*06c3fb27SDimitry Andric // Do this one till the end - just in case we miscalculated 1396*06c3fb27SDimitry Andric // and either too much is left to process or new_iv is a bit too big: 1397*06c3fb27SDimitry Andric kmp_calc_original_ivs_for_end(original_bounds_nest, n, 1398*06c3fb27SDimitry Andric /*out*/ original_ivs_end); 1399*06c3fb27SDimitry Andric 1400*06c3fb27SDimitry Andric last_iter = true; 1401*06c3fb27SDimitry Andric } else { 1402*06c3fb27SDimitry Andric // Note: here we make sure it's past (or equal to) the previous point. 1403*06c3fb27SDimitry Andric if (!kmp_calc_original_ivs_for_chunk_end(original_bounds_nest, n, 1404*06c3fb27SDimitry Andric updated_bounds_nest, 1405*06c3fb27SDimitry Andric original_ivs_start, new_iv, 1406*06c3fb27SDimitry Andric /*out*/ original_ivs_end)) { 1407*06c3fb27SDimitry Andric // We could not find the ending point, use the original upper bounds: 1408*06c3fb27SDimitry Andric kmp_calc_original_ivs_for_end(original_bounds_nest, n, 1409*06c3fb27SDimitry Andric /*out*/ original_ivs_end); 1410*06c3fb27SDimitry Andric 1411*06c3fb27SDimitry Andric last_iter = true; 1412*06c3fb27SDimitry Andric } 1413*06c3fb27SDimitry Andric } 1414*06c3fb27SDimitry Andric 1415*06c3fb27SDimitry Andric #if defined(KMP_DEBUG) 1416*06c3fb27SDimitry Andric auto new_iv_for_end = kmp_calc_new_iv_from_original_ivs( 1417*06c3fb27SDimitry Andric updated_bounds_nest, original_ivs_end, n); 1418*06c3fb27SDimitry Andric KMP_DEBUG_ASSERT(new_iv_for_end >= new_iv_for_start); 1419*06c3fb27SDimitry Andric #endif 1420*06c3fb27SDimitry Andric 1421*06c3fb27SDimitry Andric if (last_iter && (tid != 0)) { 1422*06c3fb27SDimitry Andric // We are done, this was last chunk, but no chunk for current thread was 1423*06c3fb27SDimitry Andric // found: 1424*06c3fb27SDimitry Andric __kmp_free(updated_bounds_nest); 1425*06c3fb27SDimitry Andric __kmp_free(original_ivs_start); 1426*06c3fb27SDimitry Andric __kmp_free(original_ivs_end); 1427*06c3fb27SDimitry Andric __kmp_free(original_ivs_next_start); 1428*06c3fb27SDimitry Andric return FALSE; 1429*06c3fb27SDimitry Andric } 1430*06c3fb27SDimitry Andric 1431*06c3fb27SDimitry Andric if (tid == 0) { 1432*06c3fb27SDimitry Andric // We found the chunk for this thread, now we need to check if it's the 1433*06c3fb27SDimitry Andric // last chunk or not: 1434*06c3fb27SDimitry Andric 1435*06c3fb27SDimitry Andric if (last_iter || 1436*06c3fb27SDimitry Andric !kmp_calc_next_original_ivs(original_bounds_nest, n, original_ivs_end, 1437*06c3fb27SDimitry Andric /*out*/ original_ivs_next_start)) { 1438*06c3fb27SDimitry Andric // no more loop iterations left to process, 1439*06c3fb27SDimitry Andric // this means that currently found chunk is the last chunk: 1440*06c3fb27SDimitry Andric if (plastiter != NULL) { 1441*06c3fb27SDimitry Andric *plastiter = TRUE; 1442*06c3fb27SDimitry Andric } 1443*06c3fb27SDimitry Andric } 1444*06c3fb27SDimitry Andric 1445*06c3fb27SDimitry Andric // Fill in chunk bounds: 1446*06c3fb27SDimitry Andric for (kmp_index_t i = 0; i < n; ++i) { 1447*06c3fb27SDimitry Andric chunk_bounds_nest[i] = 1448*06c3fb27SDimitry Andric original_bounds_nest[i]; // To fill in types, etc. - optional 1449*06c3fb27SDimitry Andric chunk_bounds_nest[i].lb0_u64 = original_ivs_start[i]; 1450*06c3fb27SDimitry Andric chunk_bounds_nest[i].lb1_u64 = 0; 1451*06c3fb27SDimitry Andric 1452*06c3fb27SDimitry Andric chunk_bounds_nest[i].ub0_u64 = original_ivs_end[i]; 1453*06c3fb27SDimitry Andric chunk_bounds_nest[i].ub1_u64 = 0; 1454*06c3fb27SDimitry Andric } 1455*06c3fb27SDimitry Andric 1456*06c3fb27SDimitry Andric __kmp_free(updated_bounds_nest); 1457*06c3fb27SDimitry Andric __kmp_free(original_ivs_start); 1458*06c3fb27SDimitry Andric __kmp_free(original_ivs_end); 1459*06c3fb27SDimitry Andric __kmp_free(original_ivs_next_start); 1460*06c3fb27SDimitry Andric return TRUE; 1461*06c3fb27SDimitry Andric } 1462*06c3fb27SDimitry Andric 1463*06c3fb27SDimitry Andric --tid; 1464*06c3fb27SDimitry Andric --nth; 1465*06c3fb27SDimitry Andric 1466*06c3fb27SDimitry Andric bool next_chunk = kmp_calc_next_original_ivs( 1467*06c3fb27SDimitry Andric original_bounds_nest, n, original_ivs_end, /*out*/ original_ivs_start); 1468*06c3fb27SDimitry Andric if (!next_chunk) { 1469*06c3fb27SDimitry Andric // no more loop iterations to process, 1470*06c3fb27SDimitry Andric // the prevoius chunk was the last chunk 1471*06c3fb27SDimitry Andric break; 1472*06c3fb27SDimitry Andric } 1473*06c3fb27SDimitry Andric 1474*06c3fb27SDimitry Andric // original_ivs_start is next to previous chunk original_ivs_end, 1475*06c3fb27SDimitry Andric // we need to start new chunk here, so chunks will be one after another 1476*06c3fb27SDimitry Andric // without any gap or overlap: 1477*06c3fb27SDimitry Andric new_iv = kmp_calc_new_iv_from_original_ivs(updated_bounds_nest, 1478*06c3fb27SDimitry Andric original_ivs_start, n); 1479*06c3fb27SDimitry Andric } 1480*06c3fb27SDimitry Andric 1481*06c3fb27SDimitry Andric __kmp_free(updated_bounds_nest); 1482*06c3fb27SDimitry Andric __kmp_free(original_ivs_start); 1483*06c3fb27SDimitry Andric __kmp_free(original_ivs_end); 1484*06c3fb27SDimitry Andric __kmp_free(original_ivs_next_start); 1485*06c3fb27SDimitry Andric return FALSE; 1486*06c3fb27SDimitry Andric } 1487