xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_collapse.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
106c3fb27SDimitry Andric /*
206c3fb27SDimitry Andric  * kmp_collapse.cpp -- loop collapse feature
306c3fb27SDimitry Andric  */
406c3fb27SDimitry Andric 
506c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
806c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
906c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1006c3fb27SDimitry Andric //
1106c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
1206c3fb27SDimitry Andric 
1306c3fb27SDimitry Andric #include "kmp.h"
1406c3fb27SDimitry Andric #include "kmp_error.h"
1506c3fb27SDimitry Andric #include "kmp_i18n.h"
1606c3fb27SDimitry Andric #include "kmp_itt.h"
1706c3fb27SDimitry Andric #include "kmp_stats.h"
1806c3fb27SDimitry Andric #include "kmp_str.h"
1906c3fb27SDimitry Andric #include "kmp_collapse.h"
2006c3fb27SDimitry Andric 
2106c3fb27SDimitry Andric #if OMPT_SUPPORT
2206c3fb27SDimitry Andric #include "ompt-specific.h"
2306c3fb27SDimitry Andric #endif
2406c3fb27SDimitry Andric 
2506c3fb27SDimitry Andric // OMPTODO: different style of comments (see kmp_sched)
2606c3fb27SDimitry Andric // OMPTODO: OMPT/OMPD
2706c3fb27SDimitry Andric 
2806c3fb27SDimitry Andric // avoid inadevertently using a library based abs
2906c3fb27SDimitry Andric template <typename T> T __kmp_abs(const T val) {
3006c3fb27SDimitry Andric   return (val < 0) ? -val : val;
3106c3fb27SDimitry Andric }
3206c3fb27SDimitry Andric kmp_uint32 __kmp_abs(const kmp_uint32 val) { return val; }
3306c3fb27SDimitry Andric kmp_uint64 __kmp_abs(const kmp_uint64 val) { return val; }
3406c3fb27SDimitry Andric 
3506c3fb27SDimitry Andric //----------------------------------------------------------------------------
3606c3fb27SDimitry Andric // Common functions for working with rectangular and non-rectangular loops
3706c3fb27SDimitry Andric //----------------------------------------------------------------------------
3806c3fb27SDimitry Andric 
39*5f757f3fSDimitry Andric template <typename T> int __kmp_sign(T val) {
40*5f757f3fSDimitry Andric   return (T(0) < val) - (val < T(0));
41*5f757f3fSDimitry Andric }
42*5f757f3fSDimitry Andric 
43*5f757f3fSDimitry Andric template <typename T> class CollapseAllocator {
44*5f757f3fSDimitry Andric   typedef T *pT;
45*5f757f3fSDimitry Andric 
46*5f757f3fSDimitry Andric private:
47*5f757f3fSDimitry Andric   static const size_t allocaSize = 32; // size limit for stack allocations
48*5f757f3fSDimitry Andric                                        // (8 bytes x 4 nested loops)
49*5f757f3fSDimitry Andric   char stackAlloc[allocaSize];
50*5f757f3fSDimitry Andric   static constexpr size_t maxElemCount = allocaSize / sizeof(T);
51*5f757f3fSDimitry Andric   pT pTAlloc;
52*5f757f3fSDimitry Andric 
53*5f757f3fSDimitry Andric public:
54*5f757f3fSDimitry Andric   CollapseAllocator(size_t n) : pTAlloc(reinterpret_cast<pT>(stackAlloc)) {
55*5f757f3fSDimitry Andric     if (n > maxElemCount) {
56*5f757f3fSDimitry Andric       pTAlloc = reinterpret_cast<pT>(__kmp_allocate(n * sizeof(T)));
57*5f757f3fSDimitry Andric     }
58*5f757f3fSDimitry Andric   }
59*5f757f3fSDimitry Andric   ~CollapseAllocator() {
60*5f757f3fSDimitry Andric     if (pTAlloc != reinterpret_cast<pT>(stackAlloc)) {
61*5f757f3fSDimitry Andric       __kmp_free(pTAlloc);
62*5f757f3fSDimitry Andric     }
63*5f757f3fSDimitry Andric   }
64*5f757f3fSDimitry Andric   T &operator[](int index) { return pTAlloc[index]; }
65*5f757f3fSDimitry Andric   operator const pT() { return pTAlloc; }
66*5f757f3fSDimitry Andric };
6706c3fb27SDimitry Andric 
6806c3fb27SDimitry Andric //----------Loop canonicalization---------------------------------------------
6906c3fb27SDimitry Andric 
7006c3fb27SDimitry Andric // For loop nest (any shape):
7106c3fb27SDimitry Andric // convert != to < or >;
7206c3fb27SDimitry Andric // switch from using < or > to <= or >=.
7306c3fb27SDimitry Andric // "bounds" array has to be allocated per thread.
7406c3fb27SDimitry Andric // All other internal functions will work only with canonicalized loops.
7506c3fb27SDimitry Andric template <typename T>
7606c3fb27SDimitry Andric void kmp_canonicalize_one_loop_XX(
7706c3fb27SDimitry Andric     ident_t *loc,
7806c3fb27SDimitry Andric     /*in/out*/ bounds_infoXX_template<T> *bounds) {
7906c3fb27SDimitry Andric 
8006c3fb27SDimitry Andric   if (__kmp_env_consistency_check) {
8106c3fb27SDimitry Andric     if (bounds->step == 0) {
8206c3fb27SDimitry Andric       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
8306c3fb27SDimitry Andric                             loc);
8406c3fb27SDimitry Andric     }
8506c3fb27SDimitry Andric   }
8606c3fb27SDimitry Andric 
8706c3fb27SDimitry Andric   if (bounds->comparison == comparison_t::comp_not_eq) {
8806c3fb27SDimitry Andric     // We can convert this to < or >, depends on the sign of the step:
8906c3fb27SDimitry Andric     if (bounds->step > 0) {
9006c3fb27SDimitry Andric       bounds->comparison = comparison_t::comp_less;
9106c3fb27SDimitry Andric     } else {
9206c3fb27SDimitry Andric       bounds->comparison = comparison_t::comp_greater;
9306c3fb27SDimitry Andric     }
9406c3fb27SDimitry Andric   }
9506c3fb27SDimitry Andric 
9606c3fb27SDimitry Andric   if (bounds->comparison == comparison_t::comp_less) {
9706c3fb27SDimitry Andric     // Note: ub0 can be unsigned. Should be Ok to hit overflow here,
9806c3fb27SDimitry Andric     // because ub0 + ub1*j should be still positive (otherwise loop was not
9906c3fb27SDimitry Andric     // well formed)
10006c3fb27SDimitry Andric     bounds->ub0 -= 1;
10106c3fb27SDimitry Andric     bounds->comparison = comparison_t::comp_less_or_eq;
10206c3fb27SDimitry Andric   } else if (bounds->comparison == comparison_t::comp_greater) {
10306c3fb27SDimitry Andric     bounds->ub0 += 1;
10406c3fb27SDimitry Andric     bounds->comparison = comparison_t::comp_greater_or_eq;
10506c3fb27SDimitry Andric   }
10606c3fb27SDimitry Andric }
10706c3fb27SDimitry Andric 
10806c3fb27SDimitry Andric // Canonicalize loop nest. original_bounds_nest is an array of length n.
10906c3fb27SDimitry Andric void kmp_canonicalize_loop_nest(ident_t *loc,
11006c3fb27SDimitry Andric                                 /*in/out*/ bounds_info_t *original_bounds_nest,
11106c3fb27SDimitry Andric                                 kmp_index_t n) {
11206c3fb27SDimitry Andric 
11306c3fb27SDimitry Andric   for (kmp_index_t ind = 0; ind < n; ++ind) {
11406c3fb27SDimitry Andric     auto bounds = &(original_bounds_nest[ind]);
11506c3fb27SDimitry Andric 
11606c3fb27SDimitry Andric     switch (bounds->loop_type) {
11706c3fb27SDimitry Andric     case loop_type_t::loop_type_int32:
11806c3fb27SDimitry Andric       kmp_canonicalize_one_loop_XX<kmp_int32>(
11906c3fb27SDimitry Andric           loc,
12006c3fb27SDimitry Andric           /*in/out*/ (bounds_infoXX_template<kmp_int32> *)(bounds));
12106c3fb27SDimitry Andric       break;
12206c3fb27SDimitry Andric     case loop_type_t::loop_type_uint32:
12306c3fb27SDimitry Andric       kmp_canonicalize_one_loop_XX<kmp_uint32>(
12406c3fb27SDimitry Andric           loc,
12506c3fb27SDimitry Andric           /*in/out*/ (bounds_infoXX_template<kmp_uint32> *)(bounds));
12606c3fb27SDimitry Andric       break;
12706c3fb27SDimitry Andric     case loop_type_t::loop_type_int64:
12806c3fb27SDimitry Andric       kmp_canonicalize_one_loop_XX<kmp_int64>(
12906c3fb27SDimitry Andric           loc,
13006c3fb27SDimitry Andric           /*in/out*/ (bounds_infoXX_template<kmp_int64> *)(bounds));
13106c3fb27SDimitry Andric       break;
13206c3fb27SDimitry Andric     case loop_type_t::loop_type_uint64:
13306c3fb27SDimitry Andric       kmp_canonicalize_one_loop_XX<kmp_uint64>(
13406c3fb27SDimitry Andric           loc,
13506c3fb27SDimitry Andric           /*in/out*/ (bounds_infoXX_template<kmp_uint64> *)(bounds));
13606c3fb27SDimitry Andric       break;
13706c3fb27SDimitry Andric     default:
13806c3fb27SDimitry Andric       KMP_ASSERT(false);
13906c3fb27SDimitry Andric     }
14006c3fb27SDimitry Andric   }
14106c3fb27SDimitry Andric }
14206c3fb27SDimitry Andric 
14306c3fb27SDimitry Andric //----------Calculating trip count on one level-------------------------------
14406c3fb27SDimitry Andric 
14506c3fb27SDimitry Andric // Calculate trip count on this loop level.
14606c3fb27SDimitry Andric // We do this either for a rectangular loop nest,
14706c3fb27SDimitry Andric // or after an adjustment bringing the loops to a parallelepiped shape.
14806c3fb27SDimitry Andric // This number should not depend on the value of outer IV
14906c3fb27SDimitry Andric // even if the formular has lb1 and ub1.
15006c3fb27SDimitry Andric // Note: for non-rectangular loops don't use span for this, it's too big.
15106c3fb27SDimitry Andric 
15206c3fb27SDimitry Andric template <typename T>
15306c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_calculate_trip_count_XX(
15406c3fb27SDimitry Andric     /*in/out*/ bounds_infoXX_template<T> *bounds) {
15506c3fb27SDimitry Andric 
15606c3fb27SDimitry Andric   if (bounds->comparison == comparison_t::comp_less_or_eq) {
15706c3fb27SDimitry Andric     if (bounds->ub0 < bounds->lb0) {
15806c3fb27SDimitry Andric       // Note: after this we don't need to calculate inner loops,
15906c3fb27SDimitry Andric       // but that should be an edge case:
16006c3fb27SDimitry Andric       bounds->trip_count = 0;
16106c3fb27SDimitry Andric     } else {
16206c3fb27SDimitry Andric       // ub - lb may exceed signed type range; we need to cast to
16306c3fb27SDimitry Andric       // kmp_loop_nest_iv_t anyway
16406c3fb27SDimitry Andric       bounds->trip_count =
16506c3fb27SDimitry Andric           static_cast<kmp_loop_nest_iv_t>(bounds->ub0 - bounds->lb0) /
16606c3fb27SDimitry Andric               __kmp_abs(bounds->step) +
16706c3fb27SDimitry Andric           1;
16806c3fb27SDimitry Andric     }
16906c3fb27SDimitry Andric   } else if (bounds->comparison == comparison_t::comp_greater_or_eq) {
17006c3fb27SDimitry Andric     if (bounds->lb0 < bounds->ub0) {
17106c3fb27SDimitry Andric       // Note: after this we don't need to calculate inner loops,
17206c3fb27SDimitry Andric       // but that should be an edge case:
17306c3fb27SDimitry Andric       bounds->trip_count = 0;
17406c3fb27SDimitry Andric     } else {
17506c3fb27SDimitry Andric       // lb - ub may exceed signed type range; we need to cast to
17606c3fb27SDimitry Andric       // kmp_loop_nest_iv_t anyway
17706c3fb27SDimitry Andric       bounds->trip_count =
17806c3fb27SDimitry Andric           static_cast<kmp_loop_nest_iv_t>(bounds->lb0 - bounds->ub0) /
17906c3fb27SDimitry Andric               __kmp_abs(bounds->step) +
18006c3fb27SDimitry Andric           1;
18106c3fb27SDimitry Andric     }
18206c3fb27SDimitry Andric   } else {
18306c3fb27SDimitry Andric     KMP_ASSERT(false);
18406c3fb27SDimitry Andric   }
18506c3fb27SDimitry Andric   return bounds->trip_count;
18606c3fb27SDimitry Andric }
18706c3fb27SDimitry Andric 
18806c3fb27SDimitry Andric // Calculate trip count on this loop level.
18906c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_calculate_trip_count(/*in/out*/ bounds_info_t *bounds) {
19006c3fb27SDimitry Andric 
19106c3fb27SDimitry Andric   kmp_loop_nest_iv_t trip_count = 0;
19206c3fb27SDimitry Andric 
19306c3fb27SDimitry Andric   switch (bounds->loop_type) {
19406c3fb27SDimitry Andric   case loop_type_t::loop_type_int32:
19506c3fb27SDimitry Andric     trip_count = kmp_calculate_trip_count_XX<kmp_int32>(
19606c3fb27SDimitry Andric         /*in/out*/ (bounds_infoXX_template<kmp_int32> *)(bounds));
19706c3fb27SDimitry Andric     break;
19806c3fb27SDimitry Andric   case loop_type_t::loop_type_uint32:
19906c3fb27SDimitry Andric     trip_count = kmp_calculate_trip_count_XX<kmp_uint32>(
20006c3fb27SDimitry Andric         /*in/out*/ (bounds_infoXX_template<kmp_uint32> *)(bounds));
20106c3fb27SDimitry Andric     break;
20206c3fb27SDimitry Andric   case loop_type_t::loop_type_int64:
20306c3fb27SDimitry Andric     trip_count = kmp_calculate_trip_count_XX<kmp_int64>(
20406c3fb27SDimitry Andric         /*in/out*/ (bounds_infoXX_template<kmp_int64> *)(bounds));
20506c3fb27SDimitry Andric     break;
20606c3fb27SDimitry Andric   case loop_type_t::loop_type_uint64:
20706c3fb27SDimitry Andric     trip_count = kmp_calculate_trip_count_XX<kmp_uint64>(
20806c3fb27SDimitry Andric         /*in/out*/ (bounds_infoXX_template<kmp_uint64> *)(bounds));
20906c3fb27SDimitry Andric     break;
21006c3fb27SDimitry Andric   default:
21106c3fb27SDimitry Andric     KMP_ASSERT(false);
21206c3fb27SDimitry Andric   }
21306c3fb27SDimitry Andric 
21406c3fb27SDimitry Andric   return trip_count;
21506c3fb27SDimitry Andric }
21606c3fb27SDimitry Andric 
21706c3fb27SDimitry Andric //----------Trim original iv according to its type----------------------------
21806c3fb27SDimitry Andric 
21906c3fb27SDimitry Andric // Trim original iv according to its type.
22006c3fb27SDimitry Andric // Return kmp_uint64 value which can be easily used in all internal calculations
22106c3fb27SDimitry Andric // And can be statically cast back to original type in user code.
22206c3fb27SDimitry Andric kmp_uint64 kmp_fix_iv(loop_type_t loop_iv_type, kmp_uint64 original_iv) {
22306c3fb27SDimitry Andric   kmp_uint64 res = 0;
22406c3fb27SDimitry Andric 
22506c3fb27SDimitry Andric   switch (loop_iv_type) {
22606c3fb27SDimitry Andric   case loop_type_t::loop_type_int8:
22706c3fb27SDimitry Andric     res = static_cast<kmp_uint64>(static_cast<kmp_int8>(original_iv));
22806c3fb27SDimitry Andric     break;
22906c3fb27SDimitry Andric   case loop_type_t::loop_type_uint8:
23006c3fb27SDimitry Andric     res = static_cast<kmp_uint64>(static_cast<kmp_uint8>(original_iv));
23106c3fb27SDimitry Andric     break;
23206c3fb27SDimitry Andric   case loop_type_t::loop_type_int16:
23306c3fb27SDimitry Andric     res = static_cast<kmp_uint64>(static_cast<kmp_int16>(original_iv));
23406c3fb27SDimitry Andric     break;
23506c3fb27SDimitry Andric   case loop_type_t::loop_type_uint16:
23606c3fb27SDimitry Andric     res = static_cast<kmp_uint64>(static_cast<kmp_uint16>(original_iv));
23706c3fb27SDimitry Andric     break;
23806c3fb27SDimitry Andric   case loop_type_t::loop_type_int32:
23906c3fb27SDimitry Andric     res = static_cast<kmp_uint64>(static_cast<kmp_int32>(original_iv));
24006c3fb27SDimitry Andric     break;
24106c3fb27SDimitry Andric   case loop_type_t::loop_type_uint32:
24206c3fb27SDimitry Andric     res = static_cast<kmp_uint64>(static_cast<kmp_uint32>(original_iv));
24306c3fb27SDimitry Andric     break;
24406c3fb27SDimitry Andric   case loop_type_t::loop_type_int64:
24506c3fb27SDimitry Andric     res = static_cast<kmp_uint64>(static_cast<kmp_int64>(original_iv));
24606c3fb27SDimitry Andric     break;
24706c3fb27SDimitry Andric   case loop_type_t::loop_type_uint64:
24806c3fb27SDimitry Andric     res = static_cast<kmp_uint64>(original_iv);
24906c3fb27SDimitry Andric     break;
25006c3fb27SDimitry Andric   default:
25106c3fb27SDimitry Andric     KMP_ASSERT(false);
25206c3fb27SDimitry Andric   }
25306c3fb27SDimitry Andric 
25406c3fb27SDimitry Andric   return res;
25506c3fb27SDimitry Andric }
25606c3fb27SDimitry Andric 
25706c3fb27SDimitry Andric //----------Compare two IVs (remember they have a type)-----------------------
25806c3fb27SDimitry Andric 
25906c3fb27SDimitry Andric bool kmp_ivs_eq(loop_type_t loop_iv_type, kmp_uint64 original_iv1,
26006c3fb27SDimitry Andric                 kmp_uint64 original_iv2) {
26106c3fb27SDimitry Andric   bool res = false;
26206c3fb27SDimitry Andric 
26306c3fb27SDimitry Andric   switch (loop_iv_type) {
26406c3fb27SDimitry Andric   case loop_type_t::loop_type_int8:
26506c3fb27SDimitry Andric     res = static_cast<kmp_int8>(original_iv1) ==
26606c3fb27SDimitry Andric           static_cast<kmp_int8>(original_iv2);
26706c3fb27SDimitry Andric     break;
26806c3fb27SDimitry Andric   case loop_type_t::loop_type_uint8:
26906c3fb27SDimitry Andric     res = static_cast<kmp_uint8>(original_iv1) ==
27006c3fb27SDimitry Andric           static_cast<kmp_uint8>(original_iv2);
27106c3fb27SDimitry Andric     break;
27206c3fb27SDimitry Andric   case loop_type_t::loop_type_int16:
27306c3fb27SDimitry Andric     res = static_cast<kmp_int16>(original_iv1) ==
27406c3fb27SDimitry Andric           static_cast<kmp_int16>(original_iv2);
27506c3fb27SDimitry Andric     break;
27606c3fb27SDimitry Andric   case loop_type_t::loop_type_uint16:
27706c3fb27SDimitry Andric     res = static_cast<kmp_uint16>(original_iv1) ==
27806c3fb27SDimitry Andric           static_cast<kmp_uint16>(original_iv2);
27906c3fb27SDimitry Andric     break;
28006c3fb27SDimitry Andric   case loop_type_t::loop_type_int32:
28106c3fb27SDimitry Andric     res = static_cast<kmp_int32>(original_iv1) ==
28206c3fb27SDimitry Andric           static_cast<kmp_int32>(original_iv2);
28306c3fb27SDimitry Andric     break;
28406c3fb27SDimitry Andric   case loop_type_t::loop_type_uint32:
28506c3fb27SDimitry Andric     res = static_cast<kmp_uint32>(original_iv1) ==
28606c3fb27SDimitry Andric           static_cast<kmp_uint32>(original_iv2);
28706c3fb27SDimitry Andric     break;
28806c3fb27SDimitry Andric   case loop_type_t::loop_type_int64:
28906c3fb27SDimitry Andric     res = static_cast<kmp_int64>(original_iv1) ==
29006c3fb27SDimitry Andric           static_cast<kmp_int64>(original_iv2);
29106c3fb27SDimitry Andric     break;
29206c3fb27SDimitry Andric   case loop_type_t::loop_type_uint64:
29306c3fb27SDimitry Andric     res = static_cast<kmp_uint64>(original_iv1) ==
29406c3fb27SDimitry Andric           static_cast<kmp_uint64>(original_iv2);
29506c3fb27SDimitry Andric     break;
29606c3fb27SDimitry Andric   default:
29706c3fb27SDimitry Andric     KMP_ASSERT(false);
29806c3fb27SDimitry Andric   }
29906c3fb27SDimitry Andric 
30006c3fb27SDimitry Andric   return res;
30106c3fb27SDimitry Andric }
30206c3fb27SDimitry Andric 
30306c3fb27SDimitry Andric //----------Calculate original iv on one level--------------------------------
30406c3fb27SDimitry Andric 
30506c3fb27SDimitry Andric // Return true if the point fits into upper bounds on this level,
30606c3fb27SDimitry Andric // false otherwise
30706c3fb27SDimitry Andric template <typename T>
30806c3fb27SDimitry Andric bool kmp_iv_is_in_upper_bound_XX(const bounds_infoXX_template<T> *bounds,
30906c3fb27SDimitry Andric                                  const kmp_point_t original_ivs,
31006c3fb27SDimitry Andric                                  kmp_index_t ind) {
31106c3fb27SDimitry Andric 
31206c3fb27SDimitry Andric   T iv = static_cast<T>(original_ivs[ind]);
31306c3fb27SDimitry Andric   T outer_iv = static_cast<T>(original_ivs[bounds->outer_iv]);
31406c3fb27SDimitry Andric 
31506c3fb27SDimitry Andric   if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
31606c3fb27SDimitry Andric        (iv > (bounds->ub0 + bounds->ub1 * outer_iv))) ||
31706c3fb27SDimitry Andric       ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
31806c3fb27SDimitry Andric        (iv < (bounds->ub0 + bounds->ub1 * outer_iv)))) {
31906c3fb27SDimitry Andric     // The calculated point is outside of loop upper boundary:
32006c3fb27SDimitry Andric     return false;
32106c3fb27SDimitry Andric   }
32206c3fb27SDimitry Andric 
32306c3fb27SDimitry Andric   return true;
32406c3fb27SDimitry Andric }
32506c3fb27SDimitry Andric 
32606c3fb27SDimitry Andric // Calculate one iv corresponding to iteration on the level ind.
32706c3fb27SDimitry Andric // Return true if it fits into lower-upper bounds on this level
32806c3fb27SDimitry Andric // (if not, we need to re-calculate)
32906c3fb27SDimitry Andric template <typename T>
33006c3fb27SDimitry Andric bool kmp_calc_one_iv_XX(const bounds_infoXX_template<T> *bounds,
33106c3fb27SDimitry Andric                         /*in/out*/ kmp_point_t original_ivs,
33206c3fb27SDimitry Andric                         const kmp_iterations_t iterations, kmp_index_t ind,
33306c3fb27SDimitry Andric                         bool start_with_lower_bound, bool checkBounds) {
33406c3fb27SDimitry Andric 
33506c3fb27SDimitry Andric   kmp_uint64 temp = 0;
33606c3fb27SDimitry Andric   T outer_iv = static_cast<T>(original_ivs[bounds->outer_iv]);
33706c3fb27SDimitry Andric 
33806c3fb27SDimitry Andric   if (start_with_lower_bound) {
33906c3fb27SDimitry Andric     // we moved to the next iteration on one of outer loops, should start
34006c3fb27SDimitry Andric     // with the lower bound here:
34106c3fb27SDimitry Andric     temp = bounds->lb0 + bounds->lb1 * outer_iv;
34206c3fb27SDimitry Andric   } else {
34306c3fb27SDimitry Andric     auto iteration = iterations[ind];
34406c3fb27SDimitry Andric     temp = bounds->lb0 + bounds->lb1 * outer_iv + iteration * bounds->step;
34506c3fb27SDimitry Andric   }
34606c3fb27SDimitry Andric 
34706c3fb27SDimitry Andric   // Now trim original iv according to its type:
34806c3fb27SDimitry Andric   original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp);
34906c3fb27SDimitry Andric 
35006c3fb27SDimitry Andric   if (checkBounds) {
35106c3fb27SDimitry Andric     return kmp_iv_is_in_upper_bound_XX(bounds, original_ivs, ind);
35206c3fb27SDimitry Andric   } else {
35306c3fb27SDimitry Andric     return true;
35406c3fb27SDimitry Andric   }
35506c3fb27SDimitry Andric }
35606c3fb27SDimitry Andric 
35706c3fb27SDimitry Andric bool kmp_calc_one_iv(const bounds_info_t *bounds,
35806c3fb27SDimitry Andric                      /*in/out*/ kmp_point_t original_ivs,
35906c3fb27SDimitry Andric                      const kmp_iterations_t iterations, kmp_index_t ind,
36006c3fb27SDimitry Andric                      bool start_with_lower_bound, bool checkBounds) {
36106c3fb27SDimitry Andric 
36206c3fb27SDimitry Andric   switch (bounds->loop_type) {
36306c3fb27SDimitry Andric   case loop_type_t::loop_type_int32:
36406c3fb27SDimitry Andric     return kmp_calc_one_iv_XX<kmp_int32>(
36506c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int32> *)(bounds),
36606c3fb27SDimitry Andric         /*in/out*/ original_ivs, iterations, ind, start_with_lower_bound,
36706c3fb27SDimitry Andric         checkBounds);
36806c3fb27SDimitry Andric     break;
36906c3fb27SDimitry Andric   case loop_type_t::loop_type_uint32:
37006c3fb27SDimitry Andric     return kmp_calc_one_iv_XX<kmp_uint32>(
37106c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint32> *)(bounds),
37206c3fb27SDimitry Andric         /*in/out*/ original_ivs, iterations, ind, start_with_lower_bound,
37306c3fb27SDimitry Andric         checkBounds);
37406c3fb27SDimitry Andric     break;
37506c3fb27SDimitry Andric   case loop_type_t::loop_type_int64:
37606c3fb27SDimitry Andric     return kmp_calc_one_iv_XX<kmp_int64>(
37706c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int64> *)(bounds),
37806c3fb27SDimitry Andric         /*in/out*/ original_ivs, iterations, ind, start_with_lower_bound,
37906c3fb27SDimitry Andric         checkBounds);
38006c3fb27SDimitry Andric     break;
38106c3fb27SDimitry Andric   case loop_type_t::loop_type_uint64:
38206c3fb27SDimitry Andric     return kmp_calc_one_iv_XX<kmp_uint64>(
38306c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint64> *)(bounds),
38406c3fb27SDimitry Andric         /*in/out*/ original_ivs, iterations, ind, start_with_lower_bound,
38506c3fb27SDimitry Andric         checkBounds);
38606c3fb27SDimitry Andric     break;
38706c3fb27SDimitry Andric   default:
38806c3fb27SDimitry Andric     KMP_ASSERT(false);
38906c3fb27SDimitry Andric     return false;
39006c3fb27SDimitry Andric   }
39106c3fb27SDimitry Andric }
39206c3fb27SDimitry Andric 
39306c3fb27SDimitry Andric //----------Calculate original iv on one level for rectangular loop nest------
39406c3fb27SDimitry Andric 
39506c3fb27SDimitry Andric // Calculate one iv corresponding to iteration on the level ind.
39606c3fb27SDimitry Andric // Return true if it fits into lower-upper bounds on this level
39706c3fb27SDimitry Andric // (if not, we need to re-calculate)
39806c3fb27SDimitry Andric template <typename T>
39906c3fb27SDimitry Andric void kmp_calc_one_iv_rectang_XX(const bounds_infoXX_template<T> *bounds,
40006c3fb27SDimitry Andric                                 /*in/out*/ kmp_uint64 *original_ivs,
40106c3fb27SDimitry Andric                                 const kmp_iterations_t iterations,
40206c3fb27SDimitry Andric                                 kmp_index_t ind) {
40306c3fb27SDimitry Andric 
40406c3fb27SDimitry Andric   auto iteration = iterations[ind];
40506c3fb27SDimitry Andric 
40606c3fb27SDimitry Andric   kmp_uint64 temp =
40706c3fb27SDimitry Andric       bounds->lb0 +
40806c3fb27SDimitry Andric       bounds->lb1 * static_cast<T>(original_ivs[bounds->outer_iv]) +
40906c3fb27SDimitry Andric       iteration * bounds->step;
41006c3fb27SDimitry Andric 
41106c3fb27SDimitry Andric   // Now trim original iv according to its type:
41206c3fb27SDimitry Andric   original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp);
41306c3fb27SDimitry Andric }
41406c3fb27SDimitry Andric 
41506c3fb27SDimitry Andric void kmp_calc_one_iv_rectang(const bounds_info_t *bounds,
41606c3fb27SDimitry Andric                              /*in/out*/ kmp_uint64 *original_ivs,
41706c3fb27SDimitry Andric                              const kmp_iterations_t iterations,
41806c3fb27SDimitry Andric                              kmp_index_t ind) {
41906c3fb27SDimitry Andric 
42006c3fb27SDimitry Andric   switch (bounds->loop_type) {
42106c3fb27SDimitry Andric   case loop_type_t::loop_type_int32:
42206c3fb27SDimitry Andric     kmp_calc_one_iv_rectang_XX<kmp_int32>(
42306c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int32> *)(bounds),
42406c3fb27SDimitry Andric         /*in/out*/ original_ivs, iterations, ind);
42506c3fb27SDimitry Andric     break;
42606c3fb27SDimitry Andric   case loop_type_t::loop_type_uint32:
42706c3fb27SDimitry Andric     kmp_calc_one_iv_rectang_XX<kmp_uint32>(
42806c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint32> *)(bounds),
42906c3fb27SDimitry Andric         /*in/out*/ original_ivs, iterations, ind);
43006c3fb27SDimitry Andric     break;
43106c3fb27SDimitry Andric   case loop_type_t::loop_type_int64:
43206c3fb27SDimitry Andric     kmp_calc_one_iv_rectang_XX<kmp_int64>(
43306c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int64> *)(bounds),
43406c3fb27SDimitry Andric         /*in/out*/ original_ivs, iterations, ind);
43506c3fb27SDimitry Andric     break;
43606c3fb27SDimitry Andric   case loop_type_t::loop_type_uint64:
43706c3fb27SDimitry Andric     kmp_calc_one_iv_rectang_XX<kmp_uint64>(
43806c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint64> *)(bounds),
43906c3fb27SDimitry Andric         /*in/out*/ original_ivs, iterations, ind);
44006c3fb27SDimitry Andric     break;
44106c3fb27SDimitry Andric   default:
44206c3fb27SDimitry Andric     KMP_ASSERT(false);
44306c3fb27SDimitry Andric   }
44406c3fb27SDimitry Andric }
44506c3fb27SDimitry Andric 
44606c3fb27SDimitry Andric //----------------------------------------------------------------------------
44706c3fb27SDimitry Andric // Rectangular loop nest
44806c3fb27SDimitry Andric //----------------------------------------------------------------------------
44906c3fb27SDimitry Andric 
45006c3fb27SDimitry Andric //----------Canonicalize loop nest and calculate trip count-------------------
45106c3fb27SDimitry Andric 
45206c3fb27SDimitry Andric // Canonicalize loop nest and calculate overall trip count.
45306c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread.
45406c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form
45506c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a
45606c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array
45706c3fb27SDimitry Andric // (only trip counts will be calculated).
45806c3fb27SDimitry Andric // Returns trip count of overall space.
45906c3fb27SDimitry Andric extern "C" kmp_loop_nest_iv_t
46006c3fb27SDimitry Andric __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
46106c3fb27SDimitry Andric                                  /*in/out*/ bounds_info_t *original_bounds_nest,
46206c3fb27SDimitry Andric                                  kmp_index_t n) {
46306c3fb27SDimitry Andric 
46406c3fb27SDimitry Andric   kmp_canonicalize_loop_nest(loc, /*in/out*/ original_bounds_nest, n);
46506c3fb27SDimitry Andric 
46606c3fb27SDimitry Andric   kmp_loop_nest_iv_t total = 1;
46706c3fb27SDimitry Andric 
46806c3fb27SDimitry Andric   for (kmp_index_t ind = 0; ind < n; ++ind) {
46906c3fb27SDimitry Andric     auto bounds = &(original_bounds_nest[ind]);
47006c3fb27SDimitry Andric 
47106c3fb27SDimitry Andric     kmp_loop_nest_iv_t trip_count = kmp_calculate_trip_count(/*in/out*/ bounds);
47206c3fb27SDimitry Andric     total *= trip_count;
47306c3fb27SDimitry Andric   }
47406c3fb27SDimitry Andric 
47506c3fb27SDimitry Andric   return total;
47606c3fb27SDimitry Andric }
47706c3fb27SDimitry Andric 
47806c3fb27SDimitry Andric //----------Calculate old induction variables---------------------------------
47906c3fb27SDimitry Andric 
48006c3fb27SDimitry Andric // Calculate old induction variables corresponding to overall new_iv.
48106c3fb27SDimitry Andric // Note: original IV will be returned as if it had kmp_uint64 type,
48206c3fb27SDimitry Andric // will have to be converted to original type in user code.
48306c3fb27SDimitry Andric // Note: trip counts should be already calculated by
48406c3fb27SDimitry Andric // __kmpc_process_loop_nest_rectang.
48506c3fb27SDimitry Andric // OMPTODO: special case 2, 3 nested loops: either do different
48606c3fb27SDimitry Andric // interface without array or possibly template this over n
48706c3fb27SDimitry Andric extern "C" void
48806c3fb27SDimitry Andric __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
48906c3fb27SDimitry Andric                                  const bounds_info_t *original_bounds_nest,
49006c3fb27SDimitry Andric                                  /*out*/ kmp_uint64 *original_ivs,
49106c3fb27SDimitry Andric                                  kmp_index_t n) {
49206c3fb27SDimitry Andric 
493*5f757f3fSDimitry Andric   CollapseAllocator<kmp_loop_nest_iv_t> iterations(n);
49406c3fb27SDimitry Andric 
49506c3fb27SDimitry Andric   // First, calc corresponding iteration in every original loop:
49606c3fb27SDimitry Andric   for (kmp_index_t ind = n; ind > 0;) {
49706c3fb27SDimitry Andric     --ind;
49806c3fb27SDimitry Andric     auto bounds = &(original_bounds_nest[ind]);
49906c3fb27SDimitry Andric 
50006c3fb27SDimitry Andric     // should be optimized to OPDIVREM:
50106c3fb27SDimitry Andric     auto temp = new_iv / bounds->trip_count;
50206c3fb27SDimitry Andric     auto iteration = new_iv % bounds->trip_count;
50306c3fb27SDimitry Andric     new_iv = temp;
50406c3fb27SDimitry Andric 
50506c3fb27SDimitry Andric     iterations[ind] = iteration;
50606c3fb27SDimitry Andric   }
50706c3fb27SDimitry Andric   KMP_ASSERT(new_iv == 0);
50806c3fb27SDimitry Andric 
50906c3fb27SDimitry Andric   for (kmp_index_t ind = 0; ind < n; ++ind) {
51006c3fb27SDimitry Andric     auto bounds = &(original_bounds_nest[ind]);
51106c3fb27SDimitry Andric 
51206c3fb27SDimitry Andric     kmp_calc_one_iv_rectang(bounds, /*in/out*/ original_ivs, iterations, ind);
51306c3fb27SDimitry Andric   }
51406c3fb27SDimitry Andric }
51506c3fb27SDimitry Andric 
51606c3fb27SDimitry Andric //----------------------------------------------------------------------------
51706c3fb27SDimitry Andric // Non-rectangular loop nest
51806c3fb27SDimitry Andric //----------------------------------------------------------------------------
51906c3fb27SDimitry Andric 
52006c3fb27SDimitry Andric //----------Calculate maximum possible span of iv values on one level---------
52106c3fb27SDimitry Andric 
52206c3fb27SDimitry Andric // Calculate span for IV on this loop level for "<=" case.
52306c3fb27SDimitry Andric // Note: it's for <= on this loop nest level, so lower bound should be smallest
52406c3fb27SDimitry Andric // value, upper bound should be the biggest value. If the loop won't execute,
52506c3fb27SDimitry Andric // 'smallest' may be bigger than 'biggest', but we'd better not switch them
52606c3fb27SDimitry Andric // around.
52706c3fb27SDimitry Andric template <typename T>
52806c3fb27SDimitry Andric void kmp_calc_span_lessoreq_XX(
52906c3fb27SDimitry Andric     /* in/out*/ bounds_info_internalXX_template<T> *bounds,
53006c3fb27SDimitry Andric     /* in/out*/ bounds_info_internal_t *bounds_nest) {
53106c3fb27SDimitry Andric 
53206c3fb27SDimitry Andric   typedef typename traits_t<T>::unsigned_t UT;
53306c3fb27SDimitry Andric   // typedef typename traits_t<T>::signed_t ST;
53406c3fb27SDimitry Andric 
53506c3fb27SDimitry Andric   // typedef typename big_span_t span_t;
53606c3fb27SDimitry Andric   typedef T span_t;
53706c3fb27SDimitry Andric 
53806c3fb27SDimitry Andric   auto &bbounds = bounds->b;
53906c3fb27SDimitry Andric 
54006c3fb27SDimitry Andric   if ((bbounds.lb1 != 0) || (bbounds.ub1 != 0)) {
54106c3fb27SDimitry Andric     // This dimention depends on one of previous ones; can't be the outermost
54206c3fb27SDimitry Andric     // one.
54306c3fb27SDimitry Andric     bounds_info_internalXX_template<T> *previous =
54406c3fb27SDimitry Andric         reinterpret_cast<bounds_info_internalXX_template<T> *>(
54506c3fb27SDimitry Andric             &(bounds_nest[bbounds.outer_iv]));
54606c3fb27SDimitry Andric 
54706c3fb27SDimitry Andric     // OMPTODO: assert that T is compatible with loop variable type on
54806c3fb27SDimitry Andric     // 'previous' loop
54906c3fb27SDimitry Andric 
55006c3fb27SDimitry Andric     {
55106c3fb27SDimitry Andric       span_t bound_candidate1 =
55206c3fb27SDimitry Andric           bbounds.lb0 + bbounds.lb1 * previous->span_smallest;
55306c3fb27SDimitry Andric       span_t bound_candidate2 =
55406c3fb27SDimitry Andric           bbounds.lb0 + bbounds.lb1 * previous->span_biggest;
55506c3fb27SDimitry Andric       if (bound_candidate1 < bound_candidate2) {
55606c3fb27SDimitry Andric         bounds->span_smallest = bound_candidate1;
55706c3fb27SDimitry Andric       } else {
55806c3fb27SDimitry Andric         bounds->span_smallest = bound_candidate2;
55906c3fb27SDimitry Andric       }
56006c3fb27SDimitry Andric     }
56106c3fb27SDimitry Andric 
56206c3fb27SDimitry Andric     {
56306c3fb27SDimitry Andric       // We can't adjust the upper bound with respect to step, because
56406c3fb27SDimitry Andric       // lower bound might be off after adjustments
56506c3fb27SDimitry Andric 
56606c3fb27SDimitry Andric       span_t bound_candidate1 =
56706c3fb27SDimitry Andric           bbounds.ub0 + bbounds.ub1 * previous->span_smallest;
56806c3fb27SDimitry Andric       span_t bound_candidate2 =
56906c3fb27SDimitry Andric           bbounds.ub0 + bbounds.ub1 * previous->span_biggest;
57006c3fb27SDimitry Andric       if (bound_candidate1 < bound_candidate2) {
57106c3fb27SDimitry Andric         bounds->span_biggest = bound_candidate2;
57206c3fb27SDimitry Andric       } else {
57306c3fb27SDimitry Andric         bounds->span_biggest = bound_candidate1;
57406c3fb27SDimitry Andric       }
57506c3fb27SDimitry Andric     }
57606c3fb27SDimitry Andric   } else {
57706c3fb27SDimitry Andric     // Rectangular:
57806c3fb27SDimitry Andric     bounds->span_smallest = bbounds.lb0;
57906c3fb27SDimitry Andric     bounds->span_biggest = bbounds.ub0;
58006c3fb27SDimitry Andric   }
58106c3fb27SDimitry Andric   if (!bounds->loop_bounds_adjusted) {
58206c3fb27SDimitry Andric     // Here it's safe to reduce the space to the multiply of step.
58306c3fb27SDimitry Andric     // OMPTODO: check if the formular is correct.
58406c3fb27SDimitry Andric     // Also check if it would be safe to do this if we didn't adjust left side.
58506c3fb27SDimitry Andric     bounds->span_biggest -=
58606c3fb27SDimitry Andric         (static_cast<UT>(bbounds.ub0 - bbounds.lb0)) % bbounds.step; // abs?
58706c3fb27SDimitry Andric   }
58806c3fb27SDimitry Andric }
58906c3fb27SDimitry Andric 
59006c3fb27SDimitry Andric // Calculate span for IV on this loop level for ">=" case.
59106c3fb27SDimitry Andric template <typename T>
59206c3fb27SDimitry Andric void kmp_calc_span_greateroreq_XX(
59306c3fb27SDimitry Andric     /* in/out*/ bounds_info_internalXX_template<T> *bounds,
59406c3fb27SDimitry Andric     /* in/out*/ bounds_info_internal_t *bounds_nest) {
59506c3fb27SDimitry Andric 
59606c3fb27SDimitry Andric   typedef typename traits_t<T>::unsigned_t UT;
59706c3fb27SDimitry Andric   // typedef typename traits_t<T>::signed_t ST;
59806c3fb27SDimitry Andric 
59906c3fb27SDimitry Andric   // typedef typename big_span_t span_t;
60006c3fb27SDimitry Andric   typedef T span_t;
60106c3fb27SDimitry Andric 
60206c3fb27SDimitry Andric   auto &bbounds = bounds->b;
60306c3fb27SDimitry Andric 
60406c3fb27SDimitry Andric   if ((bbounds.lb1 != 0) || (bbounds.ub1 != 0)) {
60506c3fb27SDimitry Andric     // This dimention depends on one of previous ones; can't be the outermost
60606c3fb27SDimitry Andric     // one.
60706c3fb27SDimitry Andric     bounds_info_internalXX_template<T> *previous =
60806c3fb27SDimitry Andric         reinterpret_cast<bounds_info_internalXX_template<T> *>(
60906c3fb27SDimitry Andric             &(bounds_nest[bbounds.outer_iv]));
61006c3fb27SDimitry Andric 
61106c3fb27SDimitry Andric     // OMPTODO: assert that T is compatible with loop variable type on
61206c3fb27SDimitry Andric     // 'previous' loop
61306c3fb27SDimitry Andric 
61406c3fb27SDimitry Andric     {
61506c3fb27SDimitry Andric       span_t bound_candidate1 =
61606c3fb27SDimitry Andric           bbounds.lb0 + bbounds.lb1 * previous->span_smallest;
61706c3fb27SDimitry Andric       span_t bound_candidate2 =
61806c3fb27SDimitry Andric           bbounds.lb0 + bbounds.lb1 * previous->span_biggest;
61906c3fb27SDimitry Andric       if (bound_candidate1 >= bound_candidate2) {
62006c3fb27SDimitry Andric         bounds->span_smallest = bound_candidate1;
62106c3fb27SDimitry Andric       } else {
62206c3fb27SDimitry Andric         bounds->span_smallest = bound_candidate2;
62306c3fb27SDimitry Andric       }
62406c3fb27SDimitry Andric     }
62506c3fb27SDimitry Andric 
62606c3fb27SDimitry Andric     {
62706c3fb27SDimitry Andric       // We can't adjust the upper bound with respect to step, because
62806c3fb27SDimitry Andric       // lower bound might be off after adjustments
62906c3fb27SDimitry Andric 
63006c3fb27SDimitry Andric       span_t bound_candidate1 =
63106c3fb27SDimitry Andric           bbounds.ub0 + bbounds.ub1 * previous->span_smallest;
63206c3fb27SDimitry Andric       span_t bound_candidate2 =
63306c3fb27SDimitry Andric           bbounds.ub0 + bbounds.ub1 * previous->span_biggest;
63406c3fb27SDimitry Andric       if (bound_candidate1 >= bound_candidate2) {
63506c3fb27SDimitry Andric         bounds->span_biggest = bound_candidate2;
63606c3fb27SDimitry Andric       } else {
63706c3fb27SDimitry Andric         bounds->span_biggest = bound_candidate1;
63806c3fb27SDimitry Andric       }
63906c3fb27SDimitry Andric     }
64006c3fb27SDimitry Andric 
64106c3fb27SDimitry Andric   } else {
64206c3fb27SDimitry Andric     // Rectangular:
64306c3fb27SDimitry Andric     bounds->span_biggest = bbounds.lb0;
64406c3fb27SDimitry Andric     bounds->span_smallest = bbounds.ub0;
64506c3fb27SDimitry Andric   }
64606c3fb27SDimitry Andric   if (!bounds->loop_bounds_adjusted) {
64706c3fb27SDimitry Andric     // Here it's safe to reduce the space to the multiply of step.
64806c3fb27SDimitry Andric     // OMPTODO: check if the formular is correct.
64906c3fb27SDimitry Andric     // Also check if it would be safe to do this if we didn't adjust left side.
65006c3fb27SDimitry Andric     bounds->span_biggest -=
65106c3fb27SDimitry Andric         (static_cast<UT>(bbounds.ub0 - bbounds.lb0)) % bbounds.step; // abs?
65206c3fb27SDimitry Andric   }
65306c3fb27SDimitry Andric }
65406c3fb27SDimitry Andric 
65506c3fb27SDimitry Andric // Calculate maximum possible span for IV on this loop level.
65606c3fb27SDimitry Andric template <typename T>
65706c3fb27SDimitry Andric void kmp_calc_span_XX(
65806c3fb27SDimitry Andric     /* in/out*/ bounds_info_internalXX_template<T> *bounds,
65906c3fb27SDimitry Andric     /* in/out*/ bounds_info_internal_t *bounds_nest) {
66006c3fb27SDimitry Andric 
66106c3fb27SDimitry Andric   if (bounds->b.comparison == comparison_t::comp_less_or_eq) {
66206c3fb27SDimitry Andric     kmp_calc_span_lessoreq_XX(/* in/out*/ bounds, /* in/out*/ bounds_nest);
66306c3fb27SDimitry Andric   } else {
66406c3fb27SDimitry Andric     KMP_ASSERT(bounds->b.comparison == comparison_t::comp_greater_or_eq);
66506c3fb27SDimitry Andric     kmp_calc_span_greateroreq_XX(/* in/out*/ bounds, /* in/out*/ bounds_nest);
66606c3fb27SDimitry Andric   }
66706c3fb27SDimitry Andric }
66806c3fb27SDimitry Andric 
66906c3fb27SDimitry Andric //----------All initial processing of the loop nest---------------------------
67006c3fb27SDimitry Andric 
67106c3fb27SDimitry Andric // Calculate new bounds for this loop level.
67206c3fb27SDimitry Andric // To be able to work with the nest we need to get it to a parallelepiped shape.
67306c3fb27SDimitry Andric // We need to stay in the original range of values, so that there will be no
67406c3fb27SDimitry Andric // overflow, for that we'll adjust both upper and lower bounds as needed.
67506c3fb27SDimitry Andric template <typename T>
67606c3fb27SDimitry Andric void kmp_calc_new_bounds_XX(
67706c3fb27SDimitry Andric     /* in/out*/ bounds_info_internalXX_template<T> *bounds,
67806c3fb27SDimitry Andric     /* in/out*/ bounds_info_internal_t *bounds_nest) {
67906c3fb27SDimitry Andric 
68006c3fb27SDimitry Andric   auto &bbounds = bounds->b;
68106c3fb27SDimitry Andric 
68206c3fb27SDimitry Andric   if (bbounds.lb1 == bbounds.ub1) {
68306c3fb27SDimitry Andric     // Already parallel, no need to adjust:
68406c3fb27SDimitry Andric     bounds->loop_bounds_adjusted = false;
68506c3fb27SDimitry Andric   } else {
68606c3fb27SDimitry Andric     bounds->loop_bounds_adjusted = true;
68706c3fb27SDimitry Andric 
68806c3fb27SDimitry Andric     T old_lb1 = bbounds.lb1;
68906c3fb27SDimitry Andric     T old_ub1 = bbounds.ub1;
69006c3fb27SDimitry Andric 
69106c3fb27SDimitry Andric     if (__kmp_sign(old_lb1) != __kmp_sign(old_ub1)) {
69206c3fb27SDimitry Andric       // With this shape we can adjust to a rectangle:
69306c3fb27SDimitry Andric       bbounds.lb1 = 0;
69406c3fb27SDimitry Andric       bbounds.ub1 = 0;
69506c3fb27SDimitry Andric     } else {
69606c3fb27SDimitry Andric       // get upper and lower bounds to be parallel
69706c3fb27SDimitry Andric       // with values in the old range.
69806c3fb27SDimitry Andric       // Note: abs didn't work here.
69906c3fb27SDimitry Andric       if (((old_lb1 < 0) && (old_lb1 < old_ub1)) ||
70006c3fb27SDimitry Andric           ((old_lb1 > 0) && (old_lb1 > old_ub1))) {
70106c3fb27SDimitry Andric         bbounds.lb1 = old_ub1;
70206c3fb27SDimitry Andric       } else {
70306c3fb27SDimitry Andric         bbounds.ub1 = old_lb1;
70406c3fb27SDimitry Andric       }
70506c3fb27SDimitry Andric     }
70606c3fb27SDimitry Andric 
70706c3fb27SDimitry Andric     // Now need to adjust lb0, ub0, otherwise in some cases space will shrink.
70806c3fb27SDimitry Andric     // The idea here that for this IV we are now getting the same span
70906c3fb27SDimitry Andric     // irrespective of the previous IV value.
71006c3fb27SDimitry Andric     bounds_info_internalXX_template<T> *previous =
71106c3fb27SDimitry Andric         reinterpret_cast<bounds_info_internalXX_template<T> *>(
71206c3fb27SDimitry Andric             &bounds_nest[bbounds.outer_iv]);
71306c3fb27SDimitry Andric 
71406c3fb27SDimitry Andric     if (bbounds.comparison == comparison_t::comp_less_or_eq) {
71506c3fb27SDimitry Andric       if (old_lb1 < bbounds.lb1) {
71606c3fb27SDimitry Andric         KMP_ASSERT(old_lb1 < 0);
71706c3fb27SDimitry Andric         // The length is good on outer_iv biggest number,
71806c3fb27SDimitry Andric         // can use it to find where to move the lower bound:
71906c3fb27SDimitry Andric 
72006c3fb27SDimitry Andric         T sub = (bbounds.lb1 - old_lb1) * previous->span_biggest;
72106c3fb27SDimitry Andric         bbounds.lb0 -= sub; // OMPTODO: what if it'll go out of unsigned space?
72206c3fb27SDimitry Andric                             // e.g. it was 0?? (same below)
72306c3fb27SDimitry Andric       } else if (old_lb1 > bbounds.lb1) {
72406c3fb27SDimitry Andric         // still need to move lower bound:
72506c3fb27SDimitry Andric         T add = (old_lb1 - bbounds.lb1) * previous->span_smallest;
72606c3fb27SDimitry Andric         bbounds.lb0 += add;
72706c3fb27SDimitry Andric       }
72806c3fb27SDimitry Andric 
72906c3fb27SDimitry Andric       if (old_ub1 > bbounds.ub1) {
73006c3fb27SDimitry Andric         KMP_ASSERT(old_ub1 > 0);
73106c3fb27SDimitry Andric         // The length is good on outer_iv biggest number,
73206c3fb27SDimitry Andric         // can use it to find where to move upper bound:
73306c3fb27SDimitry Andric 
73406c3fb27SDimitry Andric         T add = (old_ub1 - bbounds.ub1) * previous->span_biggest;
73506c3fb27SDimitry Andric         bbounds.ub0 += add;
73606c3fb27SDimitry Andric       } else if (old_ub1 < bbounds.ub1) {
73706c3fb27SDimitry Andric         // still need to move upper bound:
73806c3fb27SDimitry Andric         T sub = (bbounds.ub1 - old_ub1) * previous->span_smallest;
73906c3fb27SDimitry Andric         bbounds.ub0 -= sub;
74006c3fb27SDimitry Andric       }
74106c3fb27SDimitry Andric     } else {
74206c3fb27SDimitry Andric       KMP_ASSERT(bbounds.comparison == comparison_t::comp_greater_or_eq);
74306c3fb27SDimitry Andric       if (old_lb1 < bbounds.lb1) {
74406c3fb27SDimitry Andric         KMP_ASSERT(old_lb1 < 0);
74506c3fb27SDimitry Andric         T sub = (bbounds.lb1 - old_lb1) * previous->span_smallest;
74606c3fb27SDimitry Andric         bbounds.lb0 -= sub;
74706c3fb27SDimitry Andric       } else if (old_lb1 > bbounds.lb1) {
74806c3fb27SDimitry Andric         T add = (old_lb1 - bbounds.lb1) * previous->span_biggest;
74906c3fb27SDimitry Andric         bbounds.lb0 += add;
75006c3fb27SDimitry Andric       }
75106c3fb27SDimitry Andric 
75206c3fb27SDimitry Andric       if (old_ub1 > bbounds.ub1) {
75306c3fb27SDimitry Andric         KMP_ASSERT(old_ub1 > 0);
75406c3fb27SDimitry Andric         T add = (old_ub1 - bbounds.ub1) * previous->span_smallest;
75506c3fb27SDimitry Andric         bbounds.ub0 += add;
75606c3fb27SDimitry Andric       } else if (old_ub1 < bbounds.ub1) {
75706c3fb27SDimitry Andric         T sub = (bbounds.ub1 - old_ub1) * previous->span_biggest;
75806c3fb27SDimitry Andric         bbounds.ub0 -= sub;
75906c3fb27SDimitry Andric       }
76006c3fb27SDimitry Andric     }
76106c3fb27SDimitry Andric   }
76206c3fb27SDimitry Andric }
76306c3fb27SDimitry Andric 
76406c3fb27SDimitry Andric // Do all processing for one canonicalized loop in the nest
76506c3fb27SDimitry Andric // (assuming that outer loops already were processed):
76606c3fb27SDimitry Andric template <typename T>
76706c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_process_one_loop_XX(
76806c3fb27SDimitry Andric     /* in/out*/ bounds_info_internalXX_template<T> *bounds,
76906c3fb27SDimitry Andric     /*in/out*/ bounds_info_internal_t *bounds_nest) {
77006c3fb27SDimitry Andric 
77106c3fb27SDimitry Andric   kmp_calc_new_bounds_XX(/* in/out*/ bounds, /* in/out*/ bounds_nest);
77206c3fb27SDimitry Andric   kmp_calc_span_XX(/* in/out*/ bounds, /* in/out*/ bounds_nest);
77306c3fb27SDimitry Andric   return kmp_calculate_trip_count_XX(/*in/out*/ &(bounds->b));
77406c3fb27SDimitry Andric }
77506c3fb27SDimitry Andric 
77606c3fb27SDimitry Andric // Non-rectangular loop nest, canonicalized to use <= or >=.
77706c3fb27SDimitry Andric // Process loop nest to have a parallelepiped shape,
77806c3fb27SDimitry Andric // calculate biggest spans for IV's on all levels and calculate overall trip
77906c3fb27SDimitry Andric // count. "bounds_nest" has to be allocated per thread.
78006c3fb27SDimitry Andric // Returns overall trip count (for adjusted space).
78106c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_process_loop_nest(
78206c3fb27SDimitry Andric     /*in/out*/ bounds_info_internal_t *bounds_nest, kmp_index_t n) {
78306c3fb27SDimitry Andric 
78406c3fb27SDimitry Andric   kmp_loop_nest_iv_t total = 1;
78506c3fb27SDimitry Andric 
78606c3fb27SDimitry Andric   for (kmp_index_t ind = 0; ind < n; ++ind) {
78706c3fb27SDimitry Andric     auto bounds = &(bounds_nest[ind]);
78806c3fb27SDimitry Andric     kmp_loop_nest_iv_t trip_count = 0;
78906c3fb27SDimitry Andric 
79006c3fb27SDimitry Andric     switch (bounds->b.loop_type) {
79106c3fb27SDimitry Andric     case loop_type_t::loop_type_int32:
79206c3fb27SDimitry Andric       trip_count = kmp_process_one_loop_XX<kmp_int32>(
79306c3fb27SDimitry Andric           /*in/out*/ (bounds_info_internalXX_template<kmp_int32> *)(bounds),
79406c3fb27SDimitry Andric           /*in/out*/ bounds_nest);
79506c3fb27SDimitry Andric       break;
79606c3fb27SDimitry Andric     case loop_type_t::loop_type_uint32:
79706c3fb27SDimitry Andric       trip_count = kmp_process_one_loop_XX<kmp_uint32>(
79806c3fb27SDimitry Andric           /*in/out*/ (bounds_info_internalXX_template<kmp_uint32> *)(bounds),
79906c3fb27SDimitry Andric           /*in/out*/ bounds_nest);
80006c3fb27SDimitry Andric       break;
80106c3fb27SDimitry Andric     case loop_type_t::loop_type_int64:
80206c3fb27SDimitry Andric       trip_count = kmp_process_one_loop_XX<kmp_int64>(
80306c3fb27SDimitry Andric           /*in/out*/ (bounds_info_internalXX_template<kmp_int64> *)(bounds),
80406c3fb27SDimitry Andric           /*in/out*/ bounds_nest);
80506c3fb27SDimitry Andric       break;
80606c3fb27SDimitry Andric     case loop_type_t::loop_type_uint64:
80706c3fb27SDimitry Andric       trip_count = kmp_process_one_loop_XX<kmp_uint64>(
80806c3fb27SDimitry Andric           /*in/out*/ (bounds_info_internalXX_template<kmp_uint64> *)(bounds),
80906c3fb27SDimitry Andric           /*in/out*/ bounds_nest);
81006c3fb27SDimitry Andric       break;
81106c3fb27SDimitry Andric     default:
81206c3fb27SDimitry Andric       KMP_ASSERT(false);
81306c3fb27SDimitry Andric     }
81406c3fb27SDimitry Andric     total *= trip_count;
81506c3fb27SDimitry Andric   }
81606c3fb27SDimitry Andric 
81706c3fb27SDimitry Andric   return total;
81806c3fb27SDimitry Andric }
81906c3fb27SDimitry Andric 
82006c3fb27SDimitry Andric //----------Calculate iterations (in the original or updated space)-----------
82106c3fb27SDimitry Andric 
82206c3fb27SDimitry Andric // Calculate number of iterations in original or updated space resulting in
82306c3fb27SDimitry Andric // original_ivs[ind] (only on this level, non-negative)
82406c3fb27SDimitry Andric // (not counting initial iteration)
82506c3fb27SDimitry Andric template <typename T>
82606c3fb27SDimitry Andric kmp_loop_nest_iv_t
82706c3fb27SDimitry Andric kmp_calc_number_of_iterations_XX(const bounds_infoXX_template<T> *bounds,
82806c3fb27SDimitry Andric                                  const kmp_point_t original_ivs,
82906c3fb27SDimitry Andric                                  kmp_index_t ind) {
83006c3fb27SDimitry Andric 
83106c3fb27SDimitry Andric   kmp_loop_nest_iv_t iterations = 0;
83206c3fb27SDimitry Andric 
83306c3fb27SDimitry Andric   if (bounds->comparison == comparison_t::comp_less_or_eq) {
83406c3fb27SDimitry Andric     iterations =
83506c3fb27SDimitry Andric         (static_cast<T>(original_ivs[ind]) - bounds->lb0 -
83606c3fb27SDimitry Andric          bounds->lb1 * static_cast<T>(original_ivs[bounds->outer_iv])) /
83706c3fb27SDimitry Andric         __kmp_abs(bounds->step);
83806c3fb27SDimitry Andric   } else {
83906c3fb27SDimitry Andric     KMP_DEBUG_ASSERT(bounds->comparison == comparison_t::comp_greater_or_eq);
84006c3fb27SDimitry Andric     iterations = (bounds->lb0 +
84106c3fb27SDimitry Andric                   bounds->lb1 * static_cast<T>(original_ivs[bounds->outer_iv]) -
84206c3fb27SDimitry Andric                   static_cast<T>(original_ivs[ind])) /
84306c3fb27SDimitry Andric                  __kmp_abs(bounds->step);
84406c3fb27SDimitry Andric   }
84506c3fb27SDimitry Andric 
84606c3fb27SDimitry Andric   return iterations;
84706c3fb27SDimitry Andric }
84806c3fb27SDimitry Andric 
84906c3fb27SDimitry Andric // Calculate number of iterations in the original or updated space resulting in
85006c3fb27SDimitry Andric // original_ivs[ind] (only on this level, non-negative)
85106c3fb27SDimitry Andric kmp_loop_nest_iv_t kmp_calc_number_of_iterations(const bounds_info_t *bounds,
85206c3fb27SDimitry Andric                                                  const kmp_point_t original_ivs,
85306c3fb27SDimitry Andric                                                  kmp_index_t ind) {
85406c3fb27SDimitry Andric 
85506c3fb27SDimitry Andric   switch (bounds->loop_type) {
85606c3fb27SDimitry Andric   case loop_type_t::loop_type_int32:
85706c3fb27SDimitry Andric     return kmp_calc_number_of_iterations_XX<kmp_int32>(
85806c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int32> *)(bounds), original_ivs, ind);
85906c3fb27SDimitry Andric     break;
86006c3fb27SDimitry Andric   case loop_type_t::loop_type_uint32:
86106c3fb27SDimitry Andric     return kmp_calc_number_of_iterations_XX<kmp_uint32>(
86206c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint32> *)(bounds), original_ivs, ind);
86306c3fb27SDimitry Andric     break;
86406c3fb27SDimitry Andric   case loop_type_t::loop_type_int64:
86506c3fb27SDimitry Andric     return kmp_calc_number_of_iterations_XX<kmp_int64>(
86606c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int64> *)(bounds), original_ivs, ind);
86706c3fb27SDimitry Andric     break;
86806c3fb27SDimitry Andric   case loop_type_t::loop_type_uint64:
86906c3fb27SDimitry Andric     return kmp_calc_number_of_iterations_XX<kmp_uint64>(
87006c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint64> *)(bounds), original_ivs, ind);
87106c3fb27SDimitry Andric     break;
87206c3fb27SDimitry Andric   default:
87306c3fb27SDimitry Andric     KMP_ASSERT(false);
87406c3fb27SDimitry Andric     return 0;
87506c3fb27SDimitry Andric   }
87606c3fb27SDimitry Andric }
87706c3fb27SDimitry Andric 
87806c3fb27SDimitry Andric //----------Calculate new iv corresponding to original ivs--------------------
87906c3fb27SDimitry Andric 
88006c3fb27SDimitry Andric // We got a point in the original loop nest.
88106c3fb27SDimitry Andric // Take updated bounds and calculate what new_iv will correspond to this point.
88206c3fb27SDimitry Andric // When we are getting original IVs from new_iv, we have to adjust to fit into
88306c3fb27SDimitry Andric // original loops bounds. Getting new_iv for the adjusted original IVs will help
88406c3fb27SDimitry Andric // with making more chunks non-empty.
88506c3fb27SDimitry Andric kmp_loop_nest_iv_t
88606c3fb27SDimitry Andric kmp_calc_new_iv_from_original_ivs(const bounds_info_internal_t *bounds_nest,
88706c3fb27SDimitry Andric                                   const kmp_point_t original_ivs,
88806c3fb27SDimitry Andric                                   kmp_index_t n) {
88906c3fb27SDimitry Andric 
89006c3fb27SDimitry Andric   kmp_loop_nest_iv_t new_iv = 0;
89106c3fb27SDimitry Andric 
89206c3fb27SDimitry Andric   for (kmp_index_t ind = 0; ind < n; ++ind) {
89306c3fb27SDimitry Andric     auto bounds = &(bounds_nest[ind].b);
89406c3fb27SDimitry Andric 
89506c3fb27SDimitry Andric     new_iv = new_iv * bounds->trip_count +
89606c3fb27SDimitry Andric              kmp_calc_number_of_iterations(bounds, original_ivs, ind);
89706c3fb27SDimitry Andric   }
89806c3fb27SDimitry Andric 
89906c3fb27SDimitry Andric   return new_iv;
90006c3fb27SDimitry Andric }
90106c3fb27SDimitry Andric 
90206c3fb27SDimitry Andric //----------Calculate original ivs for provided iterations--------------------
90306c3fb27SDimitry Andric 
90406c3fb27SDimitry Andric // Calculate original IVs for provided iterations, assuming iterations are
90506c3fb27SDimitry Andric // calculated in the original space.
90606c3fb27SDimitry Andric // Loop nest is in canonical form (with <= / >=).
90706c3fb27SDimitry Andric bool kmp_calc_original_ivs_from_iterations(
90806c3fb27SDimitry Andric     const bounds_info_t *original_bounds_nest, kmp_index_t n,
90906c3fb27SDimitry Andric     /*in/out*/ kmp_point_t original_ivs,
91006c3fb27SDimitry Andric     /*in/out*/ kmp_iterations_t iterations, kmp_index_t ind) {
91106c3fb27SDimitry Andric 
91206c3fb27SDimitry Andric   kmp_index_t lengthened_ind = n;
91306c3fb27SDimitry Andric 
91406c3fb27SDimitry Andric   for (; ind < n;) {
91506c3fb27SDimitry Andric     auto bounds = &(original_bounds_nest[ind]);
91606c3fb27SDimitry Andric     bool good = kmp_calc_one_iv(bounds, /*in/out*/ original_ivs, iterations,
91706c3fb27SDimitry Andric                                 ind, (lengthened_ind < ind), true);
91806c3fb27SDimitry Andric 
91906c3fb27SDimitry Andric     if (!good) {
92006c3fb27SDimitry Andric       // The calculated iv value is too big (or too small for >=):
92106c3fb27SDimitry Andric       if (ind == 0) {
92206c3fb27SDimitry Andric         // Space is empty:
92306c3fb27SDimitry Andric         return false;
92406c3fb27SDimitry Andric       } else {
92506c3fb27SDimitry Andric         // Go to next iteration on the outer loop:
92606c3fb27SDimitry Andric         --ind;
92706c3fb27SDimitry Andric         ++iterations[ind];
92806c3fb27SDimitry Andric         lengthened_ind = ind;
92906c3fb27SDimitry Andric         for (kmp_index_t i = ind + 1; i < n; ++i) {
93006c3fb27SDimitry Andric           iterations[i] = 0;
93106c3fb27SDimitry Andric         }
93206c3fb27SDimitry Andric         continue;
93306c3fb27SDimitry Andric       }
93406c3fb27SDimitry Andric     }
93506c3fb27SDimitry Andric     ++ind;
93606c3fb27SDimitry Andric   }
93706c3fb27SDimitry Andric 
93806c3fb27SDimitry Andric   return true;
93906c3fb27SDimitry Andric }
94006c3fb27SDimitry Andric 
94106c3fb27SDimitry Andric //----------Calculate original ivs for the beginning of the loop nest---------
94206c3fb27SDimitry Andric 
94306c3fb27SDimitry Andric // Calculate IVs for the beginning of the loop nest.
94406c3fb27SDimitry Andric // Note: lower bounds of all loops may not work -
94506c3fb27SDimitry Andric // if on some of the iterations of the outer loops inner loops are empty.
94606c3fb27SDimitry Andric // Loop nest is in canonical form (with <= / >=).
94706c3fb27SDimitry Andric bool kmp_calc_original_ivs_for_start(const bounds_info_t *original_bounds_nest,
94806c3fb27SDimitry Andric                                      kmp_index_t n,
94906c3fb27SDimitry Andric                                      /*out*/ kmp_point_t original_ivs) {
95006c3fb27SDimitry Andric 
95106c3fb27SDimitry Andric   // Iterations in the original space, multiplied by step:
952*5f757f3fSDimitry Andric   CollapseAllocator<kmp_loop_nest_iv_t> iterations(n);
95306c3fb27SDimitry Andric   for (kmp_index_t ind = n; ind > 0;) {
95406c3fb27SDimitry Andric     --ind;
95506c3fb27SDimitry Andric     iterations[ind] = 0;
95606c3fb27SDimitry Andric   }
95706c3fb27SDimitry Andric 
95806c3fb27SDimitry Andric   // Now calculate the point:
95906c3fb27SDimitry Andric   bool b = kmp_calc_original_ivs_from_iterations(original_bounds_nest, n,
96006c3fb27SDimitry Andric                                                  /*in/out*/ original_ivs,
96106c3fb27SDimitry Andric                                                  /*in/out*/ iterations, 0);
96206c3fb27SDimitry Andric   return b;
96306c3fb27SDimitry Andric }
96406c3fb27SDimitry Andric 
96506c3fb27SDimitry Andric //----------Calculate next point in the original loop space-------------------
96606c3fb27SDimitry Andric 
96706c3fb27SDimitry Andric // From current set of original IVs calculate next point.
96806c3fb27SDimitry Andric // Return false if there is no next point in the loop bounds.
96906c3fb27SDimitry Andric bool kmp_calc_next_original_ivs(const bounds_info_t *original_bounds_nest,
97006c3fb27SDimitry Andric                                 kmp_index_t n, const kmp_point_t original_ivs,
97106c3fb27SDimitry Andric                                 /*out*/ kmp_point_t next_original_ivs) {
97206c3fb27SDimitry Andric   // Iterations in the original space, multiplied by step (so can be negative):
973*5f757f3fSDimitry Andric   CollapseAllocator<kmp_loop_nest_iv_t> iterations(n);
97406c3fb27SDimitry Andric   // First, calc corresponding iteration in every original loop:
97506c3fb27SDimitry Andric   for (kmp_index_t ind = 0; ind < n; ++ind) {
97606c3fb27SDimitry Andric     auto bounds = &(original_bounds_nest[ind]);
97706c3fb27SDimitry Andric     iterations[ind] = kmp_calc_number_of_iterations(bounds, original_ivs, ind);
97806c3fb27SDimitry Andric   }
97906c3fb27SDimitry Andric 
98006c3fb27SDimitry Andric   for (kmp_index_t ind = 0; ind < n; ++ind) {
98106c3fb27SDimitry Andric     next_original_ivs[ind] = original_ivs[ind];
98206c3fb27SDimitry Andric   }
98306c3fb27SDimitry Andric 
98406c3fb27SDimitry Andric   // Next add one step to the iterations on the inner-most level, and see if we
98506c3fb27SDimitry Andric   // need to move up the nest:
98606c3fb27SDimitry Andric   kmp_index_t ind = n - 1;
98706c3fb27SDimitry Andric   ++iterations[ind];
98806c3fb27SDimitry Andric 
98906c3fb27SDimitry Andric   bool b = kmp_calc_original_ivs_from_iterations(
99006c3fb27SDimitry Andric       original_bounds_nest, n, /*in/out*/ next_original_ivs, iterations, ind);
99106c3fb27SDimitry Andric 
99206c3fb27SDimitry Andric   return b;
99306c3fb27SDimitry Andric }
99406c3fb27SDimitry Andric 
99506c3fb27SDimitry Andric //----------Calculate chunk end in the original loop space--------------------
99606c3fb27SDimitry Andric 
99706c3fb27SDimitry Andric // For one level calculate old induction variable corresponding to overall
99806c3fb27SDimitry Andric // new_iv for the chunk end.
99906c3fb27SDimitry Andric // Return true if it fits into upper bound on this level
100006c3fb27SDimitry Andric // (if not, we need to re-calculate)
100106c3fb27SDimitry Andric template <typename T>
100206c3fb27SDimitry Andric bool kmp_calc_one_iv_for_chunk_end_XX(
100306c3fb27SDimitry Andric     const bounds_infoXX_template<T> *bounds,
100406c3fb27SDimitry Andric     const bounds_infoXX_template<T> *updated_bounds,
100506c3fb27SDimitry Andric     /*in/out*/ kmp_point_t original_ivs, const kmp_iterations_t iterations,
100606c3fb27SDimitry Andric     kmp_index_t ind, bool start_with_lower_bound, bool compare_with_start,
100706c3fb27SDimitry Andric     const kmp_point_t original_ivs_start) {
100806c3fb27SDimitry Andric 
100906c3fb27SDimitry Andric   // typedef  std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>
101006c3fb27SDimitry Andric   // big_span_t;
101106c3fb27SDimitry Andric 
101206c3fb27SDimitry Andric   // OMPTODO: is it good enough, or do we need ST or do we need big_span_t?
101306c3fb27SDimitry Andric   T temp = 0;
101406c3fb27SDimitry Andric 
101506c3fb27SDimitry Andric   T outer_iv = static_cast<T>(original_ivs[bounds->outer_iv]);
101606c3fb27SDimitry Andric 
101706c3fb27SDimitry Andric   if (start_with_lower_bound) {
101806c3fb27SDimitry Andric     // we moved to the next iteration on one of outer loops, may as well use
101906c3fb27SDimitry Andric     // the lower bound here:
102006c3fb27SDimitry Andric     temp = bounds->lb0 + bounds->lb1 * outer_iv;
102106c3fb27SDimitry Andric   } else {
102206c3fb27SDimitry Andric     // Start in expanded space, but:
102306c3fb27SDimitry Andric     // - we need to hit original space lower bound, so need to account for
102406c3fb27SDimitry Andric     // that
102506c3fb27SDimitry Andric     // - we have to go into original space, even if that means adding more
102606c3fb27SDimitry Andric     // iterations than was planned
102706c3fb27SDimitry Andric     // - we have to go past (or equal to) previous point (which is the chunk
102806c3fb27SDimitry Andric     // starting point)
102906c3fb27SDimitry Andric 
103006c3fb27SDimitry Andric     auto iteration = iterations[ind];
103106c3fb27SDimitry Andric 
103206c3fb27SDimitry Andric     auto step = bounds->step;
103306c3fb27SDimitry Andric 
103406c3fb27SDimitry Andric     // In case of >= it's negative:
103506c3fb27SDimitry Andric     auto accountForStep =
103606c3fb27SDimitry Andric         ((bounds->lb0 + bounds->lb1 * outer_iv) -
103706c3fb27SDimitry Andric          (updated_bounds->lb0 + updated_bounds->lb1 * outer_iv)) %
103806c3fb27SDimitry Andric         step;
103906c3fb27SDimitry Andric 
104006c3fb27SDimitry Andric     temp = updated_bounds->lb0 + updated_bounds->lb1 * outer_iv +
104106c3fb27SDimitry Andric            accountForStep + iteration * step;
104206c3fb27SDimitry Andric 
104306c3fb27SDimitry Andric     if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
104406c3fb27SDimitry Andric          (temp < (bounds->lb0 + bounds->lb1 * outer_iv))) ||
104506c3fb27SDimitry Andric         ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
104606c3fb27SDimitry Andric          (temp > (bounds->lb0 + bounds->lb1 * outer_iv)))) {
104706c3fb27SDimitry Andric       // Too small (or too big), didn't reach the original lower bound. Use
104806c3fb27SDimitry Andric       // heuristic:
104906c3fb27SDimitry Andric       temp = bounds->lb0 + bounds->lb1 * outer_iv + iteration / 2 * step;
105006c3fb27SDimitry Andric     }
105106c3fb27SDimitry Andric 
105206c3fb27SDimitry Andric     if (compare_with_start) {
105306c3fb27SDimitry Andric 
105406c3fb27SDimitry Andric       T start = static_cast<T>(original_ivs_start[ind]);
105506c3fb27SDimitry Andric 
105606c3fb27SDimitry Andric       temp = kmp_fix_iv(bounds->loop_iv_type, temp);
105706c3fb27SDimitry Andric 
105806c3fb27SDimitry Andric       // On all previous levels start of the chunk is same as the end, need to
105906c3fb27SDimitry Andric       // be really careful here:
106006c3fb27SDimitry Andric       if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
106106c3fb27SDimitry Andric            (temp < start)) ||
106206c3fb27SDimitry Andric           ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
106306c3fb27SDimitry Andric            (temp > start))) {
106406c3fb27SDimitry Andric         // End of the chunk can't be smaller (for >= bigger) than it's start.
106506c3fb27SDimitry Andric         // Use heuristic:
106606c3fb27SDimitry Andric         temp = start + iteration / 4 * step;
106706c3fb27SDimitry Andric       }
106806c3fb27SDimitry Andric     }
106906c3fb27SDimitry Andric   }
107006c3fb27SDimitry Andric 
107106c3fb27SDimitry Andric   original_ivs[ind] = temp = kmp_fix_iv(bounds->loop_iv_type, temp);
107206c3fb27SDimitry Andric 
107306c3fb27SDimitry Andric   if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
107406c3fb27SDimitry Andric        (temp > (bounds->ub0 + bounds->ub1 * outer_iv))) ||
107506c3fb27SDimitry Andric       ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
107606c3fb27SDimitry Andric        (temp < (bounds->ub0 + bounds->ub1 * outer_iv)))) {
107706c3fb27SDimitry Andric     // Too big (or too small for >=).
107806c3fb27SDimitry Andric     return false;
107906c3fb27SDimitry Andric   }
108006c3fb27SDimitry Andric 
108106c3fb27SDimitry Andric   return true;
108206c3fb27SDimitry Andric }
108306c3fb27SDimitry Andric 
108406c3fb27SDimitry Andric // For one level calculate old induction variable corresponding to overall
108506c3fb27SDimitry Andric // new_iv for the chunk end.
108606c3fb27SDimitry Andric bool kmp_calc_one_iv_for_chunk_end(const bounds_info_t *bounds,
108706c3fb27SDimitry Andric                                    const bounds_info_t *updated_bounds,
108806c3fb27SDimitry Andric                                    /*in/out*/ kmp_point_t original_ivs,
108906c3fb27SDimitry Andric                                    const kmp_iterations_t iterations,
109006c3fb27SDimitry Andric                                    kmp_index_t ind, bool start_with_lower_bound,
109106c3fb27SDimitry Andric                                    bool compare_with_start,
109206c3fb27SDimitry Andric                                    const kmp_point_t original_ivs_start) {
109306c3fb27SDimitry Andric 
109406c3fb27SDimitry Andric   switch (bounds->loop_type) {
109506c3fb27SDimitry Andric   case loop_type_t::loop_type_int32:
109606c3fb27SDimitry Andric     return kmp_calc_one_iv_for_chunk_end_XX<kmp_int32>(
109706c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int32> *)(bounds),
109806c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int32> *)(updated_bounds),
109906c3fb27SDimitry Andric         /*in/out*/
110006c3fb27SDimitry Andric         original_ivs, iterations, ind, start_with_lower_bound,
110106c3fb27SDimitry Andric         compare_with_start, original_ivs_start);
110206c3fb27SDimitry Andric     break;
110306c3fb27SDimitry Andric   case loop_type_t::loop_type_uint32:
110406c3fb27SDimitry Andric     return kmp_calc_one_iv_for_chunk_end_XX<kmp_uint32>(
110506c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint32> *)(bounds),
110606c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint32> *)(updated_bounds),
110706c3fb27SDimitry Andric         /*in/out*/
110806c3fb27SDimitry Andric         original_ivs, iterations, ind, start_with_lower_bound,
110906c3fb27SDimitry Andric         compare_with_start, original_ivs_start);
111006c3fb27SDimitry Andric     break;
111106c3fb27SDimitry Andric   case loop_type_t::loop_type_int64:
111206c3fb27SDimitry Andric     return kmp_calc_one_iv_for_chunk_end_XX<kmp_int64>(
111306c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int64> *)(bounds),
111406c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int64> *)(updated_bounds),
111506c3fb27SDimitry Andric         /*in/out*/
111606c3fb27SDimitry Andric         original_ivs, iterations, ind, start_with_lower_bound,
111706c3fb27SDimitry Andric         compare_with_start, original_ivs_start);
111806c3fb27SDimitry Andric     break;
111906c3fb27SDimitry Andric   case loop_type_t::loop_type_uint64:
112006c3fb27SDimitry Andric     return kmp_calc_one_iv_for_chunk_end_XX<kmp_uint64>(
112106c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint64> *)(bounds),
112206c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint64> *)(updated_bounds),
112306c3fb27SDimitry Andric         /*in/out*/
112406c3fb27SDimitry Andric         original_ivs, iterations, ind, start_with_lower_bound,
112506c3fb27SDimitry Andric         compare_with_start, original_ivs_start);
112606c3fb27SDimitry Andric     break;
112706c3fb27SDimitry Andric   default:
112806c3fb27SDimitry Andric     KMP_ASSERT(false);
112906c3fb27SDimitry Andric     return false;
113006c3fb27SDimitry Andric   }
113106c3fb27SDimitry Andric }
113206c3fb27SDimitry Andric 
113306c3fb27SDimitry Andric // Calculate old induction variables corresponding to overall new_iv for the
113406c3fb27SDimitry Andric // chunk end. If due to space extension we are getting old IVs outside of the
113506c3fb27SDimitry Andric // boundaries, bring them into the boundaries. Need to do this in the runtime,
113606c3fb27SDimitry Andric // esp. on the lower bounds side. When getting result need to make sure that the
113706c3fb27SDimitry Andric // new chunk starts at next position to old chunk, not overlaps with it (this is
113806c3fb27SDimitry Andric // done elsewhere), and need to make sure end of the chunk is further than the
113906c3fb27SDimitry Andric // beginning of the chunk. We don't need an exact ending point here, just
114006c3fb27SDimitry Andric // something more-or-less close to the desired chunk length, bigger is fine
114106c3fb27SDimitry Andric // (smaller would be fine, but we risk going into infinite loop, so do smaller
114206c3fb27SDimitry Andric // only at the very end of the space). result: false if could not find the
114306c3fb27SDimitry Andric // ending point in the original loop space. In this case the caller can use
114406c3fb27SDimitry Andric // original upper bounds as the end of the chunk. Chunk won't be empty, because
114506c3fb27SDimitry Andric // it'll have at least the starting point, which is by construction in the
114606c3fb27SDimitry Andric // original space.
114706c3fb27SDimitry Andric bool kmp_calc_original_ivs_for_chunk_end(
114806c3fb27SDimitry Andric     const bounds_info_t *original_bounds_nest, kmp_index_t n,
114906c3fb27SDimitry Andric     const bounds_info_internal_t *updated_bounds_nest,
115006c3fb27SDimitry Andric     const kmp_point_t original_ivs_start, kmp_loop_nest_iv_t new_iv,
115106c3fb27SDimitry Andric     /*out*/ kmp_point_t original_ivs) {
115206c3fb27SDimitry Andric 
115306c3fb27SDimitry Andric   // Iterations in the expanded space:
1154*5f757f3fSDimitry Andric   CollapseAllocator<kmp_loop_nest_iv_t> iterations(n);
115506c3fb27SDimitry Andric   // First, calc corresponding iteration in every modified loop:
115606c3fb27SDimitry Andric   for (kmp_index_t ind = n; ind > 0;) {
115706c3fb27SDimitry Andric     --ind;
115806c3fb27SDimitry Andric     auto &updated_bounds = updated_bounds_nest[ind];
115906c3fb27SDimitry Andric 
116006c3fb27SDimitry Andric     // should be optimized to OPDIVREM:
116106c3fb27SDimitry Andric     auto new_ind = new_iv / updated_bounds.b.trip_count;
116206c3fb27SDimitry Andric     auto iteration = new_iv % updated_bounds.b.trip_count;
116306c3fb27SDimitry Andric 
116406c3fb27SDimitry Andric     new_iv = new_ind;
116506c3fb27SDimitry Andric     iterations[ind] = iteration;
116606c3fb27SDimitry Andric   }
116706c3fb27SDimitry Andric   KMP_DEBUG_ASSERT(new_iv == 0);
116806c3fb27SDimitry Andric 
116906c3fb27SDimitry Andric   kmp_index_t lengthened_ind = n;
117006c3fb27SDimitry Andric   kmp_index_t equal_ind = -1;
117106c3fb27SDimitry Andric 
117206c3fb27SDimitry Andric   // Next calculate the point, but in original loop nest.
117306c3fb27SDimitry Andric   for (kmp_index_t ind = 0; ind < n;) {
117406c3fb27SDimitry Andric     auto bounds = &(original_bounds_nest[ind]);
117506c3fb27SDimitry Andric     auto updated_bounds = &(updated_bounds_nest[ind].b);
117606c3fb27SDimitry Andric 
117706c3fb27SDimitry Andric     bool good = kmp_calc_one_iv_for_chunk_end(
117806c3fb27SDimitry Andric         bounds, updated_bounds,
117906c3fb27SDimitry Andric         /*in/out*/ original_ivs, iterations, ind, (lengthened_ind < ind),
118006c3fb27SDimitry Andric         (equal_ind >= ind - 1), original_ivs_start);
118106c3fb27SDimitry Andric 
118206c3fb27SDimitry Andric     if (!good) {
118306c3fb27SDimitry Andric       // Too big (or too small for >=).
118406c3fb27SDimitry Andric       if (ind == 0) {
118506c3fb27SDimitry Andric         // Need to reduce to the end.
118606c3fb27SDimitry Andric         return false;
118706c3fb27SDimitry Andric       } else {
118806c3fb27SDimitry Andric         // Go to next iteration on outer loop:
118906c3fb27SDimitry Andric         --ind;
119006c3fb27SDimitry Andric         ++(iterations[ind]);
119106c3fb27SDimitry Andric         lengthened_ind = ind;
119206c3fb27SDimitry Andric         if (equal_ind >= lengthened_ind) {
119306c3fb27SDimitry Andric           // We've changed the number of iterations here,
119406c3fb27SDimitry Andric           // can't be same anymore:
119506c3fb27SDimitry Andric           equal_ind = lengthened_ind - 1;
119606c3fb27SDimitry Andric         }
119706c3fb27SDimitry Andric         for (kmp_index_t i = ind + 1; i < n; ++i) {
119806c3fb27SDimitry Andric           iterations[i] = 0;
119906c3fb27SDimitry Andric         }
120006c3fb27SDimitry Andric         continue;
120106c3fb27SDimitry Andric       }
120206c3fb27SDimitry Andric     }
120306c3fb27SDimitry Andric 
120406c3fb27SDimitry Andric     if ((equal_ind == ind - 1) &&
120506c3fb27SDimitry Andric         (kmp_ivs_eq(bounds->loop_iv_type, original_ivs[ind],
120606c3fb27SDimitry Andric                     original_ivs_start[ind]))) {
120706c3fb27SDimitry Andric       equal_ind = ind;
120806c3fb27SDimitry Andric     } else if ((equal_ind > ind - 1) &&
120906c3fb27SDimitry Andric                !(kmp_ivs_eq(bounds->loop_iv_type, original_ivs[ind],
121006c3fb27SDimitry Andric                             original_ivs_start[ind]))) {
121106c3fb27SDimitry Andric       equal_ind = ind - 1;
121206c3fb27SDimitry Andric     }
121306c3fb27SDimitry Andric     ++ind;
121406c3fb27SDimitry Andric   }
121506c3fb27SDimitry Andric 
121606c3fb27SDimitry Andric   return true;
121706c3fb27SDimitry Andric }
121806c3fb27SDimitry Andric 
121906c3fb27SDimitry Andric //----------Calculate upper bounds for the last chunk-------------------------
122006c3fb27SDimitry Andric 
122106c3fb27SDimitry Andric // Calculate one upper bound for the end.
122206c3fb27SDimitry Andric template <typename T>
122306c3fb27SDimitry Andric void kmp_calc_one_iv_end_XX(const bounds_infoXX_template<T> *bounds,
122406c3fb27SDimitry Andric                             /*in/out*/ kmp_point_t original_ivs,
122506c3fb27SDimitry Andric                             kmp_index_t ind) {
122606c3fb27SDimitry Andric 
122706c3fb27SDimitry Andric   T temp = bounds->ub0 +
122806c3fb27SDimitry Andric            bounds->ub1 * static_cast<T>(original_ivs[bounds->outer_iv]);
122906c3fb27SDimitry Andric 
123006c3fb27SDimitry Andric   original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp);
123106c3fb27SDimitry Andric }
123206c3fb27SDimitry Andric 
123306c3fb27SDimitry Andric void kmp_calc_one_iv_end(const bounds_info_t *bounds,
123406c3fb27SDimitry Andric                          /*in/out*/ kmp_point_t original_ivs, kmp_index_t ind) {
123506c3fb27SDimitry Andric 
123606c3fb27SDimitry Andric   switch (bounds->loop_type) {
123706c3fb27SDimitry Andric   default:
123806c3fb27SDimitry Andric     KMP_ASSERT(false);
123906c3fb27SDimitry Andric     break;
124006c3fb27SDimitry Andric   case loop_type_t::loop_type_int32:
124106c3fb27SDimitry Andric     kmp_calc_one_iv_end_XX<kmp_int32>(
124206c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int32> *)(bounds),
124306c3fb27SDimitry Andric         /*in/out*/ original_ivs, ind);
124406c3fb27SDimitry Andric     break;
124506c3fb27SDimitry Andric   case loop_type_t::loop_type_uint32:
124606c3fb27SDimitry Andric     kmp_calc_one_iv_end_XX<kmp_uint32>(
124706c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint32> *)(bounds),
124806c3fb27SDimitry Andric         /*in/out*/ original_ivs, ind);
124906c3fb27SDimitry Andric     break;
125006c3fb27SDimitry Andric   case loop_type_t::loop_type_int64:
125106c3fb27SDimitry Andric     kmp_calc_one_iv_end_XX<kmp_int64>(
125206c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_int64> *)(bounds),
125306c3fb27SDimitry Andric         /*in/out*/ original_ivs, ind);
125406c3fb27SDimitry Andric     break;
125506c3fb27SDimitry Andric   case loop_type_t::loop_type_uint64:
125606c3fb27SDimitry Andric     kmp_calc_one_iv_end_XX<kmp_uint64>(
125706c3fb27SDimitry Andric         (bounds_infoXX_template<kmp_uint64> *)(bounds),
125806c3fb27SDimitry Andric         /*in/out*/ original_ivs, ind);
125906c3fb27SDimitry Andric     break;
126006c3fb27SDimitry Andric   }
126106c3fb27SDimitry Andric }
126206c3fb27SDimitry Andric 
126306c3fb27SDimitry Andric // Calculate upper bounds for the last loop iteration. Just use original upper
126406c3fb27SDimitry Andric // bounds (adjusted when canonicalized to use <= / >=). No need to check that
126506c3fb27SDimitry Andric // this point is in the original space (it's likely not)
126606c3fb27SDimitry Andric void kmp_calc_original_ivs_for_end(
126706c3fb27SDimitry Andric     const bounds_info_t *const original_bounds_nest, kmp_index_t n,
126806c3fb27SDimitry Andric     /*out*/ kmp_point_t original_ivs) {
126906c3fb27SDimitry Andric   for (kmp_index_t ind = 0; ind < n; ++ind) {
127006c3fb27SDimitry Andric     auto bounds = &(original_bounds_nest[ind]);
127106c3fb27SDimitry Andric     kmp_calc_one_iv_end(bounds, /*in/out*/ original_ivs, ind);
127206c3fb27SDimitry Andric   }
127306c3fb27SDimitry Andric }
127406c3fb27SDimitry Andric 
127506c3fb27SDimitry Andric //----------Init API for non-rectangular loops--------------------------------
127606c3fb27SDimitry Andric 
127706c3fb27SDimitry Andric // Init API for collapsed loops (static, no chunks defined).
127806c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread.
127906c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form
128006c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a
128106c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array
128206c3fb27SDimitry Andric // (only trip counts will be calculated). Internally API will expand the space
128306c3fb27SDimitry Andric // to parallelogram/parallelepiped, calculate total, calculate bounds for the
128406c3fb27SDimitry Andric // chunks in terms of the new IV, re-calc them in terms of old IVs (especially
128506c3fb27SDimitry Andric // important on the left side, to hit the lower bounds and not step over), and
128606c3fb27SDimitry Andric // pick the correct chunk for this thread (so it will calculate chunks up to the
128706c3fb27SDimitry Andric // needed one). It could be optimized to calculate just this chunk, potentially
128806c3fb27SDimitry Andric // a bit less well distributed among threads. It is designed to make sure that
128906c3fb27SDimitry Andric // threads will receive predictable chunks, deterministically (so that next nest
129006c3fb27SDimitry Andric // of loops with similar characteristics will get exactly same chunks on same
129106c3fb27SDimitry Andric // threads).
129206c3fb27SDimitry Andric // Current contract: chunk_bounds_nest has only lb0 and ub0,
129306c3fb27SDimitry Andric // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
129406c3fb27SDimitry Andric extern "C" kmp_int32
129506c3fb27SDimitry Andric __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
129606c3fb27SDimitry Andric                           /*in/out*/ bounds_info_t *original_bounds_nest,
129706c3fb27SDimitry Andric                           /*out*/ bounds_info_t *chunk_bounds_nest,
129806c3fb27SDimitry Andric                           kmp_index_t n, /*out*/ kmp_int32 *plastiter) {
129906c3fb27SDimitry Andric 
130006c3fb27SDimitry Andric   KMP_DEBUG_ASSERT(plastiter && original_bounds_nest);
130106c3fb27SDimitry Andric   KE_TRACE(10, ("__kmpc_for_collapsed_init called (%d)\n", gtid));
130206c3fb27SDimitry Andric 
130306c3fb27SDimitry Andric   if (__kmp_env_consistency_check) {
130406c3fb27SDimitry Andric     __kmp_push_workshare(gtid, ct_pdo, loc);
130506c3fb27SDimitry Andric   }
130606c3fb27SDimitry Andric 
130706c3fb27SDimitry Andric   kmp_canonicalize_loop_nest(loc, /*in/out*/ original_bounds_nest, n);
130806c3fb27SDimitry Andric 
1309*5f757f3fSDimitry Andric   CollapseAllocator<bounds_info_internal_t> updated_bounds_nest(n);
131006c3fb27SDimitry Andric 
131106c3fb27SDimitry Andric   for (kmp_index_t i = 0; i < n; ++i) {
131206c3fb27SDimitry Andric     updated_bounds_nest[i].b = original_bounds_nest[i];
131306c3fb27SDimitry Andric   }
131406c3fb27SDimitry Andric 
131506c3fb27SDimitry Andric   kmp_loop_nest_iv_t total =
131606c3fb27SDimitry Andric       kmp_process_loop_nest(/*in/out*/ updated_bounds_nest, n);
131706c3fb27SDimitry Andric 
131806c3fb27SDimitry Andric   if (plastiter != NULL) {
131906c3fb27SDimitry Andric     *plastiter = FALSE;
132006c3fb27SDimitry Andric   }
132106c3fb27SDimitry Andric 
132206c3fb27SDimitry Andric   if (total == 0) {
132306c3fb27SDimitry Andric     // Loop won't execute:
132406c3fb27SDimitry Andric     return FALSE;
132506c3fb27SDimitry Andric   }
132606c3fb27SDimitry Andric 
132706c3fb27SDimitry Andric   // OMPTODO: DISTRIBUTE is not supported yet
132806c3fb27SDimitry Andric   __kmp_assert_valid_gtid(gtid);
132906c3fb27SDimitry Andric   kmp_uint32 tid = __kmp_tid_from_gtid(gtid);
133006c3fb27SDimitry Andric 
133106c3fb27SDimitry Andric   kmp_info_t *th = __kmp_threads[gtid];
133206c3fb27SDimitry Andric   kmp_team_t *team = th->th.th_team;
133306c3fb27SDimitry Andric   kmp_uint32 nth = team->t.t_nproc; // Number of threads
133406c3fb27SDimitry Andric 
133506c3fb27SDimitry Andric   KMP_DEBUG_ASSERT(tid < nth);
133606c3fb27SDimitry Andric 
1337*5f757f3fSDimitry Andric   CollapseAllocator<kmp_uint64> original_ivs_start(n);
133806c3fb27SDimitry Andric 
133906c3fb27SDimitry Andric   if (!kmp_calc_original_ivs_for_start(original_bounds_nest, n,
134006c3fb27SDimitry Andric                                        /*out*/ original_ivs_start)) {
134106c3fb27SDimitry Andric     // Loop won't execute:
134206c3fb27SDimitry Andric     return FALSE;
134306c3fb27SDimitry Andric   }
134406c3fb27SDimitry Andric 
134506c3fb27SDimitry Andric   // Not doing this optimization for one thread:
134606c3fb27SDimitry Andric   // (1) more to test
134706c3fb27SDimitry Andric   // (2) without it current contract that chunk_bounds_nest has only lb0 and
134806c3fb27SDimitry Andric   // ub0, lb1 and ub1 are set to 0 and can be ignored.
134906c3fb27SDimitry Andric   // if (nth == 1) {
135006c3fb27SDimitry Andric   //  // One thread:
135106c3fb27SDimitry Andric   //  // Copy all info from original_bounds_nest, it'll be good enough.
135206c3fb27SDimitry Andric 
135306c3fb27SDimitry Andric   //  for (kmp_index_t i = 0; i < n; ++i) {
135406c3fb27SDimitry Andric   //    chunk_bounds_nest[i] = original_bounds_nest[i];
135506c3fb27SDimitry Andric   //  }
135606c3fb27SDimitry Andric 
135706c3fb27SDimitry Andric   //  if (plastiter != NULL) {
135806c3fb27SDimitry Andric   //    *plastiter = TRUE;
135906c3fb27SDimitry Andric   //  }
136006c3fb27SDimitry Andric   //  return TRUE;
136106c3fb27SDimitry Andric   //}
136206c3fb27SDimitry Andric 
136306c3fb27SDimitry Andric   kmp_loop_nest_iv_t new_iv = kmp_calc_new_iv_from_original_ivs(
136406c3fb27SDimitry Andric       updated_bounds_nest, original_ivs_start, n);
136506c3fb27SDimitry Andric 
136606c3fb27SDimitry Andric   bool last_iter = false;
136706c3fb27SDimitry Andric 
136806c3fb27SDimitry Andric   for (; nth > 0;) {
136906c3fb27SDimitry Andric     // We could calculate chunk size once, but this is to compensate that the
137006c3fb27SDimitry Andric     // original space is not parallelepiped and some threads can be left
137106c3fb27SDimitry Andric     // without work:
137206c3fb27SDimitry Andric     KMP_DEBUG_ASSERT(total >= new_iv);
137306c3fb27SDimitry Andric 
137406c3fb27SDimitry Andric     kmp_loop_nest_iv_t total_left = total - new_iv;
137506c3fb27SDimitry Andric     kmp_loop_nest_iv_t chunk_size = total_left / nth;
137606c3fb27SDimitry Andric     kmp_loop_nest_iv_t remainder = total_left % nth;
137706c3fb27SDimitry Andric 
137806c3fb27SDimitry Andric     kmp_loop_nest_iv_t curr_chunk_size = chunk_size;
137906c3fb27SDimitry Andric 
138006c3fb27SDimitry Andric     if (remainder > 0) {
138106c3fb27SDimitry Andric       ++curr_chunk_size;
138206c3fb27SDimitry Andric       --remainder;
138306c3fb27SDimitry Andric     }
138406c3fb27SDimitry Andric 
138506c3fb27SDimitry Andric #if defined(KMP_DEBUG)
138606c3fb27SDimitry Andric     kmp_loop_nest_iv_t new_iv_for_start = new_iv;
138706c3fb27SDimitry Andric #endif
138806c3fb27SDimitry Andric 
138906c3fb27SDimitry Andric     if (curr_chunk_size > 1) {
139006c3fb27SDimitry Andric       new_iv += curr_chunk_size - 1;
139106c3fb27SDimitry Andric     }
139206c3fb27SDimitry Andric 
1393*5f757f3fSDimitry Andric     CollapseAllocator<kmp_uint64> original_ivs_end(n);
139406c3fb27SDimitry Andric     if ((nth == 1) || (new_iv >= total - 1)) {
139506c3fb27SDimitry Andric       // Do this one till the end - just in case we miscalculated
139606c3fb27SDimitry Andric       // and either too much is left to process or new_iv is a bit too big:
139706c3fb27SDimitry Andric       kmp_calc_original_ivs_for_end(original_bounds_nest, n,
139806c3fb27SDimitry Andric                                     /*out*/ original_ivs_end);
139906c3fb27SDimitry Andric 
140006c3fb27SDimitry Andric       last_iter = true;
140106c3fb27SDimitry Andric     } else {
140206c3fb27SDimitry Andric       // Note: here we make sure it's past (or equal to) the previous point.
140306c3fb27SDimitry Andric       if (!kmp_calc_original_ivs_for_chunk_end(original_bounds_nest, n,
140406c3fb27SDimitry Andric                                                updated_bounds_nest,
140506c3fb27SDimitry Andric                                                original_ivs_start, new_iv,
140606c3fb27SDimitry Andric                                                /*out*/ original_ivs_end)) {
140706c3fb27SDimitry Andric         // We could not find the ending point, use the original upper bounds:
140806c3fb27SDimitry Andric         kmp_calc_original_ivs_for_end(original_bounds_nest, n,
140906c3fb27SDimitry Andric                                       /*out*/ original_ivs_end);
141006c3fb27SDimitry Andric 
141106c3fb27SDimitry Andric         last_iter = true;
141206c3fb27SDimitry Andric       }
141306c3fb27SDimitry Andric     }
141406c3fb27SDimitry Andric 
141506c3fb27SDimitry Andric #if defined(KMP_DEBUG)
141606c3fb27SDimitry Andric     auto new_iv_for_end = kmp_calc_new_iv_from_original_ivs(
141706c3fb27SDimitry Andric         updated_bounds_nest, original_ivs_end, n);
141806c3fb27SDimitry Andric     KMP_DEBUG_ASSERT(new_iv_for_end >= new_iv_for_start);
141906c3fb27SDimitry Andric #endif
142006c3fb27SDimitry Andric 
142106c3fb27SDimitry Andric     if (last_iter && (tid != 0)) {
142206c3fb27SDimitry Andric       // We are done, this was last chunk, but no chunk for current thread was
142306c3fb27SDimitry Andric       // found:
142406c3fb27SDimitry Andric       return FALSE;
142506c3fb27SDimitry Andric     }
142606c3fb27SDimitry Andric 
142706c3fb27SDimitry Andric     if (tid == 0) {
142806c3fb27SDimitry Andric       // We found the chunk for this thread, now we need to check if it's the
142906c3fb27SDimitry Andric       // last chunk or not:
143006c3fb27SDimitry Andric 
1431*5f757f3fSDimitry Andric       CollapseAllocator<kmp_uint64> original_ivs_next_start(n);
143206c3fb27SDimitry Andric       if (last_iter ||
143306c3fb27SDimitry Andric           !kmp_calc_next_original_ivs(original_bounds_nest, n, original_ivs_end,
143406c3fb27SDimitry Andric                                       /*out*/ original_ivs_next_start)) {
143506c3fb27SDimitry Andric         // no more loop iterations left to process,
143606c3fb27SDimitry Andric         // this means that currently found chunk is the last chunk:
143706c3fb27SDimitry Andric         if (plastiter != NULL) {
143806c3fb27SDimitry Andric           *plastiter = TRUE;
143906c3fb27SDimitry Andric         }
144006c3fb27SDimitry Andric       }
144106c3fb27SDimitry Andric 
144206c3fb27SDimitry Andric       // Fill in chunk bounds:
144306c3fb27SDimitry Andric       for (kmp_index_t i = 0; i < n; ++i) {
144406c3fb27SDimitry Andric         chunk_bounds_nest[i] =
144506c3fb27SDimitry Andric             original_bounds_nest[i]; // To fill in types, etc. - optional
144606c3fb27SDimitry Andric         chunk_bounds_nest[i].lb0_u64 = original_ivs_start[i];
144706c3fb27SDimitry Andric         chunk_bounds_nest[i].lb1_u64 = 0;
144806c3fb27SDimitry Andric 
144906c3fb27SDimitry Andric         chunk_bounds_nest[i].ub0_u64 = original_ivs_end[i];
145006c3fb27SDimitry Andric         chunk_bounds_nest[i].ub1_u64 = 0;
145106c3fb27SDimitry Andric       }
145206c3fb27SDimitry Andric 
145306c3fb27SDimitry Andric       return TRUE;
145406c3fb27SDimitry Andric     }
145506c3fb27SDimitry Andric 
145606c3fb27SDimitry Andric     --tid;
145706c3fb27SDimitry Andric     --nth;
145806c3fb27SDimitry Andric 
145906c3fb27SDimitry Andric     bool next_chunk = kmp_calc_next_original_ivs(
146006c3fb27SDimitry Andric         original_bounds_nest, n, original_ivs_end, /*out*/ original_ivs_start);
146106c3fb27SDimitry Andric     if (!next_chunk) {
146206c3fb27SDimitry Andric       // no more loop iterations to process,
146306c3fb27SDimitry Andric       // the prevoius chunk was the last chunk
146406c3fb27SDimitry Andric       break;
146506c3fb27SDimitry Andric     }
146606c3fb27SDimitry Andric 
146706c3fb27SDimitry Andric     // original_ivs_start is next to previous chunk original_ivs_end,
146806c3fb27SDimitry Andric     // we need to start new chunk here, so chunks will be one after another
146906c3fb27SDimitry Andric     // without any gap or overlap:
147006c3fb27SDimitry Andric     new_iv = kmp_calc_new_iv_from_original_ivs(updated_bounds_nest,
147106c3fb27SDimitry Andric                                                original_ivs_start, n);
147206c3fb27SDimitry Andric   }
147306c3fb27SDimitry Andric 
147406c3fb27SDimitry Andric   return FALSE;
147506c3fb27SDimitry Andric }
1476