xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_dispatch.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch.
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric /* Dynamic scheduling initialization and dispatch.
140b57cec5SDimitry Andric  *
150b57cec5SDimitry Andric  * NOTE: __kmp_nth is a constant inside of any dispatch loop, however
160b57cec5SDimitry Andric  *       it may change values between parallel regions.  __kmp_max_nth
170b57cec5SDimitry Andric  *       is the largest value __kmp_nth may take, 1 is the smallest.
180b57cec5SDimitry Andric  */
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric #include "kmp.h"
210b57cec5SDimitry Andric #include "kmp_error.h"
220b57cec5SDimitry Andric #include "kmp_i18n.h"
230b57cec5SDimitry Andric #include "kmp_itt.h"
240b57cec5SDimitry Andric #include "kmp_stats.h"
250b57cec5SDimitry Andric #include "kmp_str.h"
260b57cec5SDimitry Andric #if KMP_USE_X87CONTROL
270b57cec5SDimitry Andric #include <float.h>
280b57cec5SDimitry Andric #endif
290b57cec5SDimitry Andric #include "kmp_lock.h"
300b57cec5SDimitry Andric #include "kmp_dispatch.h"
310b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
320b57cec5SDimitry Andric #include "kmp_dispatch_hier.h"
330b57cec5SDimitry Andric #endif
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric #if OMPT_SUPPORT
360b57cec5SDimitry Andric #include "ompt-specific.h"
370b57cec5SDimitry Andric #endif
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
400b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
430b57cec5SDimitry Andric   kmp_info_t *th;
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid_ref);
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric   if (__kmp_env_consistency_check) {
480b57cec5SDimitry Andric     th = __kmp_threads[*gtid_ref];
490b57cec5SDimitry Andric     if (th->th.th_root->r.r_active &&
500b57cec5SDimitry Andric         (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
510b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK
520b57cec5SDimitry Andric       __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0);
530b57cec5SDimitry Andric #else
540b57cec5SDimitry Andric       __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL);
550b57cec5SDimitry Andric #endif
560b57cec5SDimitry Andric     }
570b57cec5SDimitry Andric   }
580b57cec5SDimitry Andric }
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
610b57cec5SDimitry Andric   kmp_info_t *th;
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric   if (__kmp_env_consistency_check) {
640b57cec5SDimitry Andric     th = __kmp_threads[*gtid_ref];
650b57cec5SDimitry Andric     if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
660b57cec5SDimitry Andric       __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
670b57cec5SDimitry Andric     }
680b57cec5SDimitry Andric   }
690b57cec5SDimitry Andric }
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric // Returns either SCHEDULE_MONOTONIC or SCHEDULE_NONMONOTONIC
72e8d8bef9SDimitry Andric static inline int __kmp_get_monotonicity(ident_t *loc, enum sched_type schedule,
730b57cec5SDimitry Andric                                          bool use_hier = false) {
740b57cec5SDimitry Andric   // Pick up the nonmonotonic/monotonic bits from the scheduling type
75349cc55cSDimitry Andric   // Nonmonotonic as default for dynamic schedule when no modifier is specified
76349cc55cSDimitry Andric   int monotonicity = SCHEDULE_NONMONOTONIC;
77e8d8bef9SDimitry Andric 
78e8d8bef9SDimitry Andric   // Let default be monotonic for executables
79e8d8bef9SDimitry Andric   // compiled with OpenMP* 4.5 or less compilers
80fe6060f1SDimitry Andric   if (loc != NULL && loc->get_openmp_version() < 50)
810b57cec5SDimitry Andric     monotonicity = SCHEDULE_MONOTONIC;
82e8d8bef9SDimitry Andric 
83fe6060f1SDimitry Andric   if (use_hier || __kmp_force_monotonic)
84e8d8bef9SDimitry Andric     monotonicity = SCHEDULE_MONOTONIC;
85e8d8bef9SDimitry Andric   else if (SCHEDULE_HAS_NONMONOTONIC(schedule))
860b57cec5SDimitry Andric     monotonicity = SCHEDULE_NONMONOTONIC;
870b57cec5SDimitry Andric   else if (SCHEDULE_HAS_MONOTONIC(schedule))
880b57cec5SDimitry Andric     monotonicity = SCHEDULE_MONOTONIC;
89e8d8bef9SDimitry Andric 
900b57cec5SDimitry Andric   return monotonicity;
910b57cec5SDimitry Andric }
920b57cec5SDimitry Andric 
935f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
945f757f3fSDimitry Andric // Return floating point number rounded to two decimal points
955f757f3fSDimitry Andric static inline float __kmp_round_2decimal_val(float num) {
965f757f3fSDimitry Andric   return (float)(static_cast<int>(num * 100 + 0.5)) / 100;
975f757f3fSDimitry Andric }
985f757f3fSDimitry Andric static inline int __kmp_get_round_val(float num) {
995f757f3fSDimitry Andric   return static_cast<int>(num < 0 ? num - 0.5 : num + 0.5);
1005f757f3fSDimitry Andric }
1015f757f3fSDimitry Andric #endif
1025f757f3fSDimitry Andric 
1035f757f3fSDimitry Andric template <typename T>
1045f757f3fSDimitry Andric inline void
1055f757f3fSDimitry Andric __kmp_initialize_self_buffer(kmp_team_t *team, T id,
1065f757f3fSDimitry Andric                              dispatch_private_info_template<T> *pr,
1075f757f3fSDimitry Andric                              typename traits_t<T>::unsigned_t nchunks, T nproc,
1085f757f3fSDimitry Andric                              typename traits_t<T>::unsigned_t &init,
1095f757f3fSDimitry Andric                              T &small_chunk, T &extras, T &p_extra) {
1105f757f3fSDimitry Andric 
1115f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
1125f757f3fSDimitry Andric   if (pr->flags.use_hybrid) {
1135f757f3fSDimitry Andric     kmp_info_t *th = __kmp_threads[__kmp_gtid_from_tid((int)id, team)];
1145f757f3fSDimitry Andric     kmp_hw_core_type_t type =
1155f757f3fSDimitry Andric         (kmp_hw_core_type_t)th->th.th_topology_attrs.core_type;
1165f757f3fSDimitry Andric     T pchunks = pr->u.p.pchunks;
1175f757f3fSDimitry Andric     T echunks = nchunks - pchunks;
1185f757f3fSDimitry Andric     T num_procs_with_pcore = pr->u.p.num_procs_with_pcore;
1195f757f3fSDimitry Andric     T num_procs_with_ecore = nproc - num_procs_with_pcore;
1205f757f3fSDimitry Andric     T first_thread_with_ecore = pr->u.p.first_thread_with_ecore;
1215f757f3fSDimitry Andric     T big_chunk =
1225f757f3fSDimitry Andric         pchunks / num_procs_with_pcore; // chunks per thread with p-core
1235f757f3fSDimitry Andric     small_chunk =
1245f757f3fSDimitry Andric         echunks / num_procs_with_ecore; // chunks per thread with e-core
1255f757f3fSDimitry Andric 
1265f757f3fSDimitry Andric     extras =
1275f757f3fSDimitry Andric         (pchunks % num_procs_with_pcore) + (echunks % num_procs_with_ecore);
1285f757f3fSDimitry Andric 
1295f757f3fSDimitry Andric     p_extra = (big_chunk - small_chunk);
1305f757f3fSDimitry Andric 
1315f757f3fSDimitry Andric     if (type == KMP_HW_CORE_TYPE_CORE) {
1325f757f3fSDimitry Andric       if (id < first_thread_with_ecore) {
1335f757f3fSDimitry Andric         init = id * small_chunk + id * p_extra + (id < extras ? id : extras);
1345f757f3fSDimitry Andric       } else {
1355f757f3fSDimitry Andric         init = id * small_chunk + (id - num_procs_with_ecore) * p_extra +
1365f757f3fSDimitry Andric                (id < extras ? id : extras);
1375f757f3fSDimitry Andric       }
1385f757f3fSDimitry Andric     } else {
1395f757f3fSDimitry Andric       if (id == first_thread_with_ecore) {
1405f757f3fSDimitry Andric         init = id * small_chunk + id * p_extra + (id < extras ? id : extras);
1415f757f3fSDimitry Andric       } else {
1425f757f3fSDimitry Andric         init = id * small_chunk + first_thread_with_ecore * p_extra +
1435f757f3fSDimitry Andric                (id < extras ? id : extras);
1445f757f3fSDimitry Andric       }
1455f757f3fSDimitry Andric     }
1465f757f3fSDimitry Andric     p_extra = (type == KMP_HW_CORE_TYPE_CORE) ? p_extra : 0;
1475f757f3fSDimitry Andric     return;
1485f757f3fSDimitry Andric   }
1495f757f3fSDimitry Andric #endif
1505f757f3fSDimitry Andric 
1515f757f3fSDimitry Andric   small_chunk = nchunks / nproc; // chunks per thread
1525f757f3fSDimitry Andric   extras = nchunks % nproc;
1535f757f3fSDimitry Andric   p_extra = 0;
1545f757f3fSDimitry Andric   init = id * small_chunk + (id < extras ? id : extras);
1555f757f3fSDimitry Andric }
1565f757f3fSDimitry Andric 
157fe6060f1SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
158fe6060f1SDimitry Andric enum { // values for steal_flag (possible states of private per-loop buffer)
159fe6060f1SDimitry Andric   UNUSED = 0,
160fe6060f1SDimitry Andric   CLAIMED = 1, // owner thread started initialization
161fe6060f1SDimitry Andric   READY = 2, // available for stealing
162fe6060f1SDimitry Andric   THIEF = 3 // finished by owner, or claimed by thief
163fe6060f1SDimitry Andric   // possible state changes:
164fe6060f1SDimitry Andric   // 0 -> 1 owner only, sync
165fe6060f1SDimitry Andric   // 0 -> 3 thief only, sync
166fe6060f1SDimitry Andric   // 1 -> 2 owner only, async
167fe6060f1SDimitry Andric   // 2 -> 3 owner only, async
168fe6060f1SDimitry Andric   // 3 -> 2 owner only, async
169fe6060f1SDimitry Andric   // 3 -> 0 last thread finishing the loop, async
170fe6060f1SDimitry Andric };
171fe6060f1SDimitry Andric #endif
172fe6060f1SDimitry Andric 
1730b57cec5SDimitry Andric // Initialize a dispatch_private_info_template<T> buffer for a particular
1740b57cec5SDimitry Andric // type of schedule,chunk.  The loop description is found in lb (lower bound),
1750b57cec5SDimitry Andric // ub (upper bound), and st (stride).  nproc is the number of threads relevant
1760b57cec5SDimitry Andric // to the scheduling (often the number of threads in a team, but not always if
1770b57cec5SDimitry Andric // hierarchical scheduling is used).  tid is the id of the thread calling
1780b57cec5SDimitry Andric // the function within the group of nproc threads.  It will have a value
1790b57cec5SDimitry Andric // between 0 and nproc - 1.  This is often just the thread id within a team, but
1800b57cec5SDimitry Andric // is not necessarily the case when using hierarchical scheduling.
1810b57cec5SDimitry Andric // loc is the source file location of the corresponding loop
1820b57cec5SDimitry Andric // gtid is the global thread id
1830b57cec5SDimitry Andric template <typename T>
1840b57cec5SDimitry Andric void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
1850b57cec5SDimitry Andric                                    dispatch_private_info_template<T> *pr,
1860b57cec5SDimitry Andric                                    enum sched_type schedule, T lb, T ub,
1870b57cec5SDimitry Andric                                    typename traits_t<T>::signed_t st,
1880b57cec5SDimitry Andric #if USE_ITT_BUILD
1890b57cec5SDimitry Andric                                    kmp_uint64 *cur_chunk,
1900b57cec5SDimitry Andric #endif
1910b57cec5SDimitry Andric                                    typename traits_t<T>::signed_t chunk,
1920b57cec5SDimitry Andric                                    T nproc, T tid) {
1930b57cec5SDimitry Andric   typedef typename traits_t<T>::unsigned_t UT;
1940b57cec5SDimitry Andric   typedef typename traits_t<T>::floating_t DBL;
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric   int active;
1970b57cec5SDimitry Andric   T tc;
1980b57cec5SDimitry Andric   kmp_info_t *th;
1990b57cec5SDimitry Andric   kmp_team_t *team;
2000b57cec5SDimitry Andric   int monotonicity;
2010b57cec5SDimitry Andric   bool use_hier;
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric #ifdef KMP_DEBUG
2040b57cec5SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
2050b57cec5SDimitry Andric   {
2060b57cec5SDimitry Andric     char *buff;
2070b57cec5SDimitry Andric     // create format specifiers before the debug output
2080b57cec5SDimitry Andric     buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d called "
2090b57cec5SDimitry Andric                             "pr:%%p lb:%%%s ub:%%%s st:%%%s "
2100b57cec5SDimitry Andric                             "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n",
2110b57cec5SDimitry Andric                             traits_t<T>::spec, traits_t<T>::spec,
2120b57cec5SDimitry Andric                             traits_t<ST>::spec, traits_t<ST>::spec,
2130b57cec5SDimitry Andric                             traits_t<T>::spec, traits_t<T>::spec);
2140b57cec5SDimitry Andric     KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid));
2150b57cec5SDimitry Andric     __kmp_str_free(&buff);
2160b57cec5SDimitry Andric   }
2170b57cec5SDimitry Andric #endif
2180b57cec5SDimitry Andric   /* setup data */
2190b57cec5SDimitry Andric   th = __kmp_threads[gtid];
2200b57cec5SDimitry Andric   team = th->th.th_team;
2210b57cec5SDimitry Andric   active = !team->t.t_serialized;
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric #if USE_ITT_BUILD
2240b57cec5SDimitry Andric   int itt_need_metadata_reporting =
2250b57cec5SDimitry Andric       __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
2260b57cec5SDimitry Andric       KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
2270b57cec5SDimitry Andric       team->t.t_active_level == 1;
2280b57cec5SDimitry Andric #endif
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
2310b57cec5SDimitry Andric   use_hier = pr->flags.use_hier;
2320b57cec5SDimitry Andric #else
2330b57cec5SDimitry Andric   use_hier = false;
2340b57cec5SDimitry Andric #endif
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric   /* Pick up the nonmonotonic/monotonic bits from the scheduling type */
237e8d8bef9SDimitry Andric   monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
2380b57cec5SDimitry Andric   schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric   /* Pick up the nomerge/ordered bits from the scheduling type */
2410b57cec5SDimitry Andric   if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) {
2420b57cec5SDimitry Andric     pr->flags.nomerge = TRUE;
2430b57cec5SDimitry Andric     schedule =
2440b57cec5SDimitry Andric         (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower));
2450b57cec5SDimitry Andric   } else {
2460b57cec5SDimitry Andric     pr->flags.nomerge = FALSE;
2470b57cec5SDimitry Andric   }
2480b57cec5SDimitry Andric   pr->type_size = traits_t<T>::type_size; // remember the size of variables
2490b57cec5SDimitry Andric   if (kmp_ord_lower & schedule) {
2500b57cec5SDimitry Andric     pr->flags.ordered = TRUE;
2510b57cec5SDimitry Andric     schedule =
2520b57cec5SDimitry Andric         (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower));
2530b57cec5SDimitry Andric   } else {
2540b57cec5SDimitry Andric     pr->flags.ordered = FALSE;
2550b57cec5SDimitry Andric   }
2560b57cec5SDimitry Andric   // Ordered overrides nonmonotonic
2570b57cec5SDimitry Andric   if (pr->flags.ordered) {
2580b57cec5SDimitry Andric     monotonicity = SCHEDULE_MONOTONIC;
2590b57cec5SDimitry Andric   }
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric   if (schedule == kmp_sch_static) {
2620b57cec5SDimitry Andric     schedule = __kmp_static;
2630b57cec5SDimitry Andric   } else {
2640b57cec5SDimitry Andric     if (schedule == kmp_sch_runtime) {
2650b57cec5SDimitry Andric       // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if
2660b57cec5SDimitry Andric       // not specified)
2670b57cec5SDimitry Andric       schedule = team->t.t_sched.r_sched_type;
268e8d8bef9SDimitry Andric       monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
2690b57cec5SDimitry Andric       schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
270fe6060f1SDimitry Andric       if (pr->flags.ordered) // correct monotonicity for ordered loop if needed
271fe6060f1SDimitry Andric         monotonicity = SCHEDULE_MONOTONIC;
2720b57cec5SDimitry Andric       // Detail the schedule if needed (global controls are differentiated
2730b57cec5SDimitry Andric       // appropriately)
2740b57cec5SDimitry Andric       if (schedule == kmp_sch_guided_chunked) {
2750b57cec5SDimitry Andric         schedule = __kmp_guided;
2760b57cec5SDimitry Andric       } else if (schedule == kmp_sch_static) {
2770b57cec5SDimitry Andric         schedule = __kmp_static;
2780b57cec5SDimitry Andric       }
2790b57cec5SDimitry Andric       // Use the chunk size specified by OMP_SCHEDULE (or default if not
2800b57cec5SDimitry Andric       // specified)
2810b57cec5SDimitry Andric       chunk = team->t.t_sched.chunk;
2820b57cec5SDimitry Andric #if USE_ITT_BUILD
2830b57cec5SDimitry Andric       if (cur_chunk)
2840b57cec5SDimitry Andric         *cur_chunk = chunk;
2850b57cec5SDimitry Andric #endif
2860b57cec5SDimitry Andric #ifdef KMP_DEBUG
2870b57cec5SDimitry Andric       {
2880b57cec5SDimitry Andric         char *buff;
2890b57cec5SDimitry Andric         // create format specifiers before the debug output
2900b57cec5SDimitry Andric         buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d new: "
2910b57cec5SDimitry Andric                                 "schedule:%%d chunk:%%%s\n",
2920b57cec5SDimitry Andric                                 traits_t<ST>::spec);
2930b57cec5SDimitry Andric         KD_TRACE(10, (buff, gtid, schedule, chunk));
2940b57cec5SDimitry Andric         __kmp_str_free(&buff);
2950b57cec5SDimitry Andric       }
2960b57cec5SDimitry Andric #endif
2970b57cec5SDimitry Andric     } else {
2980b57cec5SDimitry Andric       if (schedule == kmp_sch_guided_chunked) {
2990b57cec5SDimitry Andric         schedule = __kmp_guided;
3000b57cec5SDimitry Andric       }
3010b57cec5SDimitry Andric       if (chunk <= 0) {
3020b57cec5SDimitry Andric         chunk = KMP_DEFAULT_CHUNK;
3030b57cec5SDimitry Andric       }
3040b57cec5SDimitry Andric     }
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric     if (schedule == kmp_sch_auto) {
3070b57cec5SDimitry Andric       // mapping and differentiation: in the __kmp_do_serial_initialize()
3080b57cec5SDimitry Andric       schedule = __kmp_auto;
3090b57cec5SDimitry Andric #ifdef KMP_DEBUG
3100b57cec5SDimitry Andric       {
3110b57cec5SDimitry Andric         char *buff;
3120b57cec5SDimitry Andric         // create format specifiers before the debug output
3130b57cec5SDimitry Andric         buff = __kmp_str_format(
3140b57cec5SDimitry Andric             "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: "
3150b57cec5SDimitry Andric             "schedule:%%d chunk:%%%s\n",
3160b57cec5SDimitry Andric             traits_t<ST>::spec);
3170b57cec5SDimitry Andric         KD_TRACE(10, (buff, gtid, schedule, chunk));
3180b57cec5SDimitry Andric         __kmp_str_free(&buff);
3190b57cec5SDimitry Andric       }
3200b57cec5SDimitry Andric #endif
3210b57cec5SDimitry Andric     }
3220b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
3230b57cec5SDimitry Andric     // map nonmonotonic:dynamic to static steal
3240b57cec5SDimitry Andric     if (schedule == kmp_sch_dynamic_chunked) {
3250b57cec5SDimitry Andric       if (monotonicity == SCHEDULE_NONMONOTONIC)
3260b57cec5SDimitry Andric         schedule = kmp_sch_static_steal;
3270b57cec5SDimitry Andric     }
3280b57cec5SDimitry Andric #endif
3290b57cec5SDimitry Andric     /* guided analytical not safe for too many threads */
3300b57cec5SDimitry Andric     if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) {
3310b57cec5SDimitry Andric       schedule = kmp_sch_guided_iterative_chunked;
3320b57cec5SDimitry Andric       KMP_WARNING(DispatchManyThreads);
3330b57cec5SDimitry Andric     }
3340b57cec5SDimitry Andric     if (schedule == kmp_sch_runtime_simd) {
3350b57cec5SDimitry Andric       // compiler provides simd_width in the chunk parameter
3360b57cec5SDimitry Andric       schedule = team->t.t_sched.r_sched_type;
337e8d8bef9SDimitry Andric       monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
3380b57cec5SDimitry Andric       schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
3390b57cec5SDimitry Andric       // Detail the schedule if needed (global controls are differentiated
3400b57cec5SDimitry Andric       // appropriately)
3410b57cec5SDimitry Andric       if (schedule == kmp_sch_static || schedule == kmp_sch_auto ||
3420b57cec5SDimitry Andric           schedule == __kmp_static) {
3430b57cec5SDimitry Andric         schedule = kmp_sch_static_balanced_chunked;
3440b57cec5SDimitry Andric       } else {
3450b57cec5SDimitry Andric         if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) {
3460b57cec5SDimitry Andric           schedule = kmp_sch_guided_simd;
3470b57cec5SDimitry Andric         }
3480b57cec5SDimitry Andric         chunk = team->t.t_sched.chunk * chunk;
3490b57cec5SDimitry Andric       }
3500b57cec5SDimitry Andric #if USE_ITT_BUILD
3510b57cec5SDimitry Andric       if (cur_chunk)
3520b57cec5SDimitry Andric         *cur_chunk = chunk;
3530b57cec5SDimitry Andric #endif
3540b57cec5SDimitry Andric #ifdef KMP_DEBUG
3550b57cec5SDimitry Andric       {
3560b57cec5SDimitry Andric         char *buff;
3570b57cec5SDimitry Andric         // create format specifiers before the debug output
3580b57cec5SDimitry Andric         buff = __kmp_str_format(
3590b57cec5SDimitry Andric             "__kmp_dispatch_init_algorithm: T#%%d new: schedule:%%d"
3600b57cec5SDimitry Andric             " chunk:%%%s\n",
3610b57cec5SDimitry Andric             traits_t<ST>::spec);
3620b57cec5SDimitry Andric         KD_TRACE(10, (buff, gtid, schedule, chunk));
3630b57cec5SDimitry Andric         __kmp_str_free(&buff);
3640b57cec5SDimitry Andric       }
3650b57cec5SDimitry Andric #endif
3660b57cec5SDimitry Andric     }
3670b57cec5SDimitry Andric     pr->u.p.parm1 = chunk;
3680b57cec5SDimitry Andric   }
3690b57cec5SDimitry Andric   KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper),
3700b57cec5SDimitry Andric               "unknown scheduling type");
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric   pr->u.p.count = 0;
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   if (__kmp_env_consistency_check) {
3750b57cec5SDimitry Andric     if (st == 0) {
3760b57cec5SDimitry Andric       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
3770b57cec5SDimitry Andric                             (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc);
3780b57cec5SDimitry Andric     }
3790b57cec5SDimitry Andric   }
3800b57cec5SDimitry Andric   // compute trip count
3810b57cec5SDimitry Andric   if (st == 1) { // most common case
3820b57cec5SDimitry Andric     if (ub >= lb) {
3830b57cec5SDimitry Andric       tc = ub - lb + 1;
3840b57cec5SDimitry Andric     } else { // ub < lb
3850b57cec5SDimitry Andric       tc = 0; // zero-trip
3860b57cec5SDimitry Andric     }
3870b57cec5SDimitry Andric   } else if (st < 0) {
3880b57cec5SDimitry Andric     if (lb >= ub) {
3890b57cec5SDimitry Andric       // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B),
3900b57cec5SDimitry Andric       // where the division needs to be unsigned regardless of the result type
3910b57cec5SDimitry Andric       tc = (UT)(lb - ub) / (-st) + 1;
3920b57cec5SDimitry Andric     } else { // lb < ub
3930b57cec5SDimitry Andric       tc = 0; // zero-trip
3940b57cec5SDimitry Andric     }
3950b57cec5SDimitry Andric   } else { // st > 0
3960b57cec5SDimitry Andric     if (ub >= lb) {
3970b57cec5SDimitry Andric       // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B),
3980b57cec5SDimitry Andric       // where the division needs to be unsigned regardless of the result type
3990b57cec5SDimitry Andric       tc = (UT)(ub - lb) / st + 1;
4000b57cec5SDimitry Andric     } else { // ub < lb
4010b57cec5SDimitry Andric       tc = 0; // zero-trip
4020b57cec5SDimitry Andric     }
4030b57cec5SDimitry Andric   }
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric #if KMP_STATS_ENABLED
4060b57cec5SDimitry Andric   if (KMP_MASTER_GTID(gtid)) {
4070b57cec5SDimitry Andric     KMP_COUNT_VALUE(OMP_loop_dynamic_total_iterations, tc);
4080b57cec5SDimitry Andric   }
4090b57cec5SDimitry Andric #endif
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric   pr->u.p.lb = lb;
4120b57cec5SDimitry Andric   pr->u.p.ub = ub;
4130b57cec5SDimitry Andric   pr->u.p.st = st;
4140b57cec5SDimitry Andric   pr->u.p.tc = tc;
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric #if KMP_OS_WINDOWS
4170b57cec5SDimitry Andric   pr->u.p.last_upper = ub + st;
4180b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric   /* NOTE: only the active parallel region(s) has active ordered sections */
4210b57cec5SDimitry Andric 
4220b57cec5SDimitry Andric   if (active) {
4230b57cec5SDimitry Andric     if (pr->flags.ordered) {
4240b57cec5SDimitry Andric       pr->ordered_bumped = 0;
4250b57cec5SDimitry Andric       pr->u.p.ordered_lower = 1;
4260b57cec5SDimitry Andric       pr->u.p.ordered_upper = 0;
4270b57cec5SDimitry Andric     }
4280b57cec5SDimitry Andric   }
4290b57cec5SDimitry Andric 
4300b57cec5SDimitry Andric   switch (schedule) {
431fe6060f1SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
4320b57cec5SDimitry Andric   case kmp_sch_static_steal: {
4335f757f3fSDimitry Andric     T ntc, init = 0;
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric     KD_TRACE(100,
4360b57cec5SDimitry Andric              ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",
4370b57cec5SDimitry Andric               gtid));
4380b57cec5SDimitry Andric 
4390b57cec5SDimitry Andric     ntc = (tc % chunk ? 1 : 0) + tc / chunk;
4400b57cec5SDimitry Andric     if (nproc > 1 && ntc >= nproc) {
4410b57cec5SDimitry Andric       KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL);
4420b57cec5SDimitry Andric       T id = tid;
4435f757f3fSDimitry Andric       T small_chunk, extras, p_extra = 0;
444fe6060f1SDimitry Andric       kmp_uint32 old = UNUSED;
445fe6060f1SDimitry Andric       int claimed = pr->steal_flag.compare_exchange_strong(old, CLAIMED);
446fe6060f1SDimitry Andric       if (traits_t<T>::type_size > 4) {
447fe6060f1SDimitry Andric         // AC: TODO: check if 16-byte CAS available and use it to
448fe6060f1SDimitry Andric         // improve performance (probably wait for explicit request
449fe6060f1SDimitry Andric         // before spending time on this).
450fe6060f1SDimitry Andric         // For now use dynamically allocated per-private-buffer lock,
451fe6060f1SDimitry Andric         // free memory in __kmp_dispatch_next when status==0.
452fe6060f1SDimitry Andric         pr->u.p.steal_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
453fe6060f1SDimitry Andric         __kmp_init_lock(pr->u.p.steal_lock);
454fe6060f1SDimitry Andric       }
4555f757f3fSDimitry Andric 
4565f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
4575f757f3fSDimitry Andric       // Iterations are divided in a 60/40 skewed distribution among CORE and
4585f757f3fSDimitry Andric       // ATOM processors for hybrid systems
4595f757f3fSDimitry Andric       bool use_hybrid = false;
4605f757f3fSDimitry Andric       kmp_hw_core_type_t core_type = KMP_HW_CORE_TYPE_UNKNOWN;
4615f757f3fSDimitry Andric       T first_thread_with_ecore = 0;
4625f757f3fSDimitry Andric       T num_procs_with_pcore = 0;
4635f757f3fSDimitry Andric       T num_procs_with_ecore = 0;
4645f757f3fSDimitry Andric       T p_ntc = 0, e_ntc = 0;
4655f757f3fSDimitry Andric       if (__kmp_is_hybrid_cpu() && __kmp_affinity.type != affinity_none &&
4665f757f3fSDimitry Andric           __kmp_affinity.type != affinity_explicit) {
4675f757f3fSDimitry Andric         use_hybrid = true;
4685f757f3fSDimitry Andric         core_type = (kmp_hw_core_type_t)th->th.th_topology_attrs.core_type;
4695f757f3fSDimitry Andric         if (core_type != KMP_HW_CORE_TYPE_UNKNOWN &&
4705f757f3fSDimitry Andric             __kmp_first_osid_with_ecore > -1) {
4715f757f3fSDimitry Andric           for (int i = 0; i < team->t.t_nproc; ++i) {
4725f757f3fSDimitry Andric             kmp_hw_core_type_t type = (kmp_hw_core_type_t)team->t.t_threads[i]
4735f757f3fSDimitry Andric                                           ->th.th_topology_attrs.core_type;
4745f757f3fSDimitry Andric             int id = team->t.t_threads[i]->th.th_topology_ids.os_id;
4755f757f3fSDimitry Andric             if (id == __kmp_first_osid_with_ecore) {
4765f757f3fSDimitry Andric               first_thread_with_ecore =
4775f757f3fSDimitry Andric                   team->t.t_threads[i]->th.th_info.ds.ds_tid;
4785f757f3fSDimitry Andric             }
4795f757f3fSDimitry Andric             if (type == KMP_HW_CORE_TYPE_CORE) {
4805f757f3fSDimitry Andric               num_procs_with_pcore++;
4815f757f3fSDimitry Andric             } else if (type == KMP_HW_CORE_TYPE_ATOM) {
4825f757f3fSDimitry Andric               num_procs_with_ecore++;
4835f757f3fSDimitry Andric             } else {
4845f757f3fSDimitry Andric               use_hybrid = false;
4855f757f3fSDimitry Andric               break;
4865f757f3fSDimitry Andric             }
4875f757f3fSDimitry Andric           }
4885f757f3fSDimitry Andric         }
4895f757f3fSDimitry Andric         if (num_procs_with_pcore > 0 && num_procs_with_ecore > 0) {
4905f757f3fSDimitry Andric           float multiplier = 60.0 / 40.0;
4915f757f3fSDimitry Andric           float p_ratio = (float)num_procs_with_pcore / nproc;
4925f757f3fSDimitry Andric           float e_ratio = (float)num_procs_with_ecore / nproc;
4935f757f3fSDimitry Andric           float e_multiplier =
4945f757f3fSDimitry Andric               (float)1 /
4955f757f3fSDimitry Andric               (((multiplier * num_procs_with_pcore) / nproc) + e_ratio);
4965f757f3fSDimitry Andric           float p_multiplier = multiplier * e_multiplier;
4975f757f3fSDimitry Andric           p_ntc = __kmp_get_round_val(ntc * p_ratio * p_multiplier);
4985f757f3fSDimitry Andric           if ((int)p_ntc > (int)(ntc * p_ratio * p_multiplier))
4995f757f3fSDimitry Andric             e_ntc =
5005f757f3fSDimitry Andric                 (int)(__kmp_round_2decimal_val(ntc * e_ratio * e_multiplier));
5015f757f3fSDimitry Andric           else
5025f757f3fSDimitry Andric             e_ntc = __kmp_get_round_val(ntc * e_ratio * e_multiplier);
5035f757f3fSDimitry Andric           KMP_DEBUG_ASSERT(ntc == p_ntc + e_ntc);
5045f757f3fSDimitry Andric 
5055f757f3fSDimitry Andric           // Use regular static steal if not enough chunks for skewed
5065f757f3fSDimitry Andric           // distribution
5075f757f3fSDimitry Andric           use_hybrid = (use_hybrid && (p_ntc >= num_procs_with_pcore &&
5085f757f3fSDimitry Andric                                        e_ntc >= num_procs_with_ecore)
5095f757f3fSDimitry Andric                             ? true
5105f757f3fSDimitry Andric                             : false);
5115f757f3fSDimitry Andric         } else {
5125f757f3fSDimitry Andric           use_hybrid = false;
5135f757f3fSDimitry Andric         }
5145f757f3fSDimitry Andric       }
5155f757f3fSDimitry Andric       pr->flags.use_hybrid = use_hybrid;
5165f757f3fSDimitry Andric       pr->u.p.pchunks = p_ntc;
5175f757f3fSDimitry Andric       pr->u.p.num_procs_with_pcore = num_procs_with_pcore;
5185f757f3fSDimitry Andric       pr->u.p.first_thread_with_ecore = first_thread_with_ecore;
5195f757f3fSDimitry Andric 
5205f757f3fSDimitry Andric       if (use_hybrid) {
5215f757f3fSDimitry Andric         KMP_DEBUG_ASSERT(nproc == num_procs_with_pcore + num_procs_with_ecore);
5225f757f3fSDimitry Andric         T big_chunk = p_ntc / num_procs_with_pcore;
5235f757f3fSDimitry Andric         small_chunk = e_ntc / num_procs_with_ecore;
5245f757f3fSDimitry Andric 
5255f757f3fSDimitry Andric         extras =
5265f757f3fSDimitry Andric             (p_ntc % num_procs_with_pcore) + (e_ntc % num_procs_with_ecore);
5275f757f3fSDimitry Andric 
5285f757f3fSDimitry Andric         p_extra = (big_chunk - small_chunk);
5295f757f3fSDimitry Andric 
5305f757f3fSDimitry Andric         if (core_type == KMP_HW_CORE_TYPE_CORE) {
5315f757f3fSDimitry Andric           if (id < first_thread_with_ecore) {
5325f757f3fSDimitry Andric             init =
5335f757f3fSDimitry Andric                 id * small_chunk + id * p_extra + (id < extras ? id : extras);
5345f757f3fSDimitry Andric           } else {
5355f757f3fSDimitry Andric             init = id * small_chunk + (id - num_procs_with_ecore) * p_extra +
5365f757f3fSDimitry Andric                    (id < extras ? id : extras);
5375f757f3fSDimitry Andric           }
5385f757f3fSDimitry Andric         } else {
5395f757f3fSDimitry Andric           if (id == first_thread_with_ecore) {
5405f757f3fSDimitry Andric             init =
5415f757f3fSDimitry Andric                 id * small_chunk + id * p_extra + (id < extras ? id : extras);
5425f757f3fSDimitry Andric           } else {
5435f757f3fSDimitry Andric             init = id * small_chunk + first_thread_with_ecore * p_extra +
5445f757f3fSDimitry Andric                    (id < extras ? id : extras);
5455f757f3fSDimitry Andric           }
5465f757f3fSDimitry Andric         }
5475f757f3fSDimitry Andric         p_extra = (core_type == KMP_HW_CORE_TYPE_CORE) ? p_extra : 0;
5485f757f3fSDimitry Andric       } else
5495f757f3fSDimitry Andric #endif
5505f757f3fSDimitry Andric       {
5510b57cec5SDimitry Andric         small_chunk = ntc / nproc;
5520b57cec5SDimitry Andric         extras = ntc % nproc;
5530b57cec5SDimitry Andric         init = id * small_chunk + (id < extras ? id : extras);
5545f757f3fSDimitry Andric         p_extra = 0;
5555f757f3fSDimitry Andric       }
5560b57cec5SDimitry Andric       pr->u.p.count = init;
557fe6060f1SDimitry Andric       if (claimed) { // are we succeeded in claiming own buffer?
5585f757f3fSDimitry Andric         pr->u.p.ub = init + small_chunk + p_extra + (id < extras ? 1 : 0);
559fe6060f1SDimitry Andric         // Other threads will inspect steal_flag when searching for a victim.
560fe6060f1SDimitry Andric         // READY means other threads may steal from this thread from now on.
561fe6060f1SDimitry Andric         KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
562fe6060f1SDimitry Andric       } else {
563fe6060f1SDimitry Andric         // other thread has stolen whole our range
564fe6060f1SDimitry Andric         KMP_DEBUG_ASSERT(pr->steal_flag == THIEF);
565fe6060f1SDimitry Andric         pr->u.p.ub = init; // mark there is no iterations to work on
5660b57cec5SDimitry Andric       }
567fe6060f1SDimitry Andric       pr->u.p.parm2 = ntc; // save number of chunks
568fe6060f1SDimitry Andric       // parm3 is the number of times to attempt stealing which is
569fe6060f1SDimitry Andric       // nproc (just a heuristics, could be optimized later on).
570fe6060f1SDimitry Andric       pr->u.p.parm3 = nproc;
571fe6060f1SDimitry Andric       pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
5720b57cec5SDimitry Andric       break;
5730b57cec5SDimitry Andric     } else {
574480093f4SDimitry Andric       /* too few chunks: switching to kmp_sch_dynamic_chunked */
575480093f4SDimitry Andric       schedule = kmp_sch_dynamic_chunked;
576480093f4SDimitry Andric       KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to "
577480093f4SDimitry Andric                      "kmp_sch_dynamic_chunked\n",
5780b57cec5SDimitry Andric                      gtid));
579fe6060f1SDimitry Andric       goto dynamic_init;
580480093f4SDimitry Andric       break;
5810b57cec5SDimitry Andric     } // if
5820b57cec5SDimitry Andric   } // case
5830b57cec5SDimitry Andric #endif
5840b57cec5SDimitry Andric   case kmp_sch_static_balanced: {
5850b57cec5SDimitry Andric     T init, limit;
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric     KD_TRACE(
5880b57cec5SDimitry Andric         100,
5890b57cec5SDimitry Andric         ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",
5900b57cec5SDimitry Andric          gtid));
5910b57cec5SDimitry Andric 
5920b57cec5SDimitry Andric     if (nproc > 1) {
5930b57cec5SDimitry Andric       T id = tid;
5940b57cec5SDimitry Andric 
5950b57cec5SDimitry Andric       if (tc < nproc) {
5960b57cec5SDimitry Andric         if (id < tc) {
5970b57cec5SDimitry Andric           init = id;
5980b57cec5SDimitry Andric           limit = id;
5990b57cec5SDimitry Andric           pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */
6000b57cec5SDimitry Andric         } else {
6010b57cec5SDimitry Andric           pr->u.p.count = 1; /* means no more chunks to execute */
6020b57cec5SDimitry Andric           pr->u.p.parm1 = FALSE;
6030b57cec5SDimitry Andric           break;
6040b57cec5SDimitry Andric         }
6050b57cec5SDimitry Andric       } else {
6060b57cec5SDimitry Andric         T small_chunk = tc / nproc;
6070b57cec5SDimitry Andric         T extras = tc % nproc;
6080b57cec5SDimitry Andric         init = id * small_chunk + (id < extras ? id : extras);
6090b57cec5SDimitry Andric         limit = init + small_chunk - (id < extras ? 0 : 1);
6100b57cec5SDimitry Andric         pr->u.p.parm1 = (id == nproc - 1);
6110b57cec5SDimitry Andric       }
6120b57cec5SDimitry Andric     } else {
6130b57cec5SDimitry Andric       if (tc > 0) {
6140b57cec5SDimitry Andric         init = 0;
6150b57cec5SDimitry Andric         limit = tc - 1;
6160b57cec5SDimitry Andric         pr->u.p.parm1 = TRUE;
6170b57cec5SDimitry Andric       } else {
6180b57cec5SDimitry Andric         // zero trip count
6190b57cec5SDimitry Andric         pr->u.p.count = 1; /* means no more chunks to execute */
6200b57cec5SDimitry Andric         pr->u.p.parm1 = FALSE;
6210b57cec5SDimitry Andric         break;
6220b57cec5SDimitry Andric       }
6230b57cec5SDimitry Andric     }
6240b57cec5SDimitry Andric #if USE_ITT_BUILD
6250b57cec5SDimitry Andric     // Calculate chunk for metadata report
6260b57cec5SDimitry Andric     if (itt_need_metadata_reporting)
6270b57cec5SDimitry Andric       if (cur_chunk)
6280b57cec5SDimitry Andric         *cur_chunk = limit - init + 1;
6290b57cec5SDimitry Andric #endif
6300b57cec5SDimitry Andric     if (st == 1) {
6310b57cec5SDimitry Andric       pr->u.p.lb = lb + init;
6320b57cec5SDimitry Andric       pr->u.p.ub = lb + limit;
6330b57cec5SDimitry Andric     } else {
6340b57cec5SDimitry Andric       // calculated upper bound, "ub" is user-defined upper bound
6350b57cec5SDimitry Andric       T ub_tmp = lb + limit * st;
6360b57cec5SDimitry Andric       pr->u.p.lb = lb + init * st;
6370b57cec5SDimitry Andric       // adjust upper bound to "ub" if needed, so that MS lastprivate will match
6380b57cec5SDimitry Andric       // it exactly
6390b57cec5SDimitry Andric       if (st > 0) {
6400b57cec5SDimitry Andric         pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
6410b57cec5SDimitry Andric       } else {
6420b57cec5SDimitry Andric         pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
6430b57cec5SDimitry Andric       }
6440b57cec5SDimitry Andric     }
6450b57cec5SDimitry Andric     if (pr->flags.ordered) {
6460b57cec5SDimitry Andric       pr->u.p.ordered_lower = init;
6470b57cec5SDimitry Andric       pr->u.p.ordered_upper = limit;
6480b57cec5SDimitry Andric     }
6490b57cec5SDimitry Andric     break;
6500b57cec5SDimitry Andric   } // case
6510b57cec5SDimitry Andric   case kmp_sch_static_balanced_chunked: {
6520b57cec5SDimitry Andric     // similar to balanced, but chunk adjusted to multiple of simd width
6530b57cec5SDimitry Andric     T nth = nproc;
6540b57cec5SDimitry Andric     KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
6550b57cec5SDimitry Andric                    " -> falling-through to static_greedy\n",
6560b57cec5SDimitry Andric                    gtid));
6570b57cec5SDimitry Andric     schedule = kmp_sch_static_greedy;
6580b57cec5SDimitry Andric     if (nth > 1)
6590b57cec5SDimitry Andric       pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
6600b57cec5SDimitry Andric     else
6610b57cec5SDimitry Andric       pr->u.p.parm1 = tc;
6620b57cec5SDimitry Andric     break;
6630b57cec5SDimitry Andric   } // case
6640b57cec5SDimitry Andric   case kmp_sch_guided_simd:
6650b57cec5SDimitry Andric   case kmp_sch_guided_iterative_chunked: {
6660b57cec5SDimitry Andric     KD_TRACE(
6670b57cec5SDimitry Andric         100,
6680b57cec5SDimitry Andric         ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
6690b57cec5SDimitry Andric          " case\n",
6700b57cec5SDimitry Andric          gtid));
6710b57cec5SDimitry Andric 
6720b57cec5SDimitry Andric     if (nproc > 1) {
6730b57cec5SDimitry Andric       if ((2L * chunk + 1) * nproc >= tc) {
6740b57cec5SDimitry Andric         /* chunk size too large, switch to dynamic */
6750b57cec5SDimitry Andric         schedule = kmp_sch_dynamic_chunked;
676fe6060f1SDimitry Andric         goto dynamic_init;
6770b57cec5SDimitry Andric       } else {
6780b57cec5SDimitry Andric         // when remaining iters become less than parm2 - switch to dynamic
6790b57cec5SDimitry Andric         pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
6800b57cec5SDimitry Andric         *(double *)&pr->u.p.parm3 =
681e8d8bef9SDimitry Andric             guided_flt_param / (double)nproc; // may occupy parm3 and parm4
6820b57cec5SDimitry Andric       }
6830b57cec5SDimitry Andric     } else {
6840b57cec5SDimitry Andric       KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
6850b57cec5SDimitry Andric                      "kmp_sch_static_greedy\n",
6860b57cec5SDimitry Andric                      gtid));
6870b57cec5SDimitry Andric       schedule = kmp_sch_static_greedy;
6880b57cec5SDimitry Andric       /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
6890b57cec5SDimitry Andric       KD_TRACE(
6900b57cec5SDimitry Andric           100,
6910b57cec5SDimitry Andric           ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
6920b57cec5SDimitry Andric            gtid));
6930b57cec5SDimitry Andric       pr->u.p.parm1 = tc;
6940b57cec5SDimitry Andric     } // if
6950b57cec5SDimitry Andric   } // case
6960b57cec5SDimitry Andric   break;
6970b57cec5SDimitry Andric   case kmp_sch_guided_analytical_chunked: {
6980b57cec5SDimitry Andric     KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "
6990b57cec5SDimitry Andric                    "kmp_sch_guided_analytical_chunked case\n",
7000b57cec5SDimitry Andric                    gtid));
7010b57cec5SDimitry Andric 
7020b57cec5SDimitry Andric     if (nproc > 1) {
7030b57cec5SDimitry Andric       if ((2L * chunk + 1) * nproc >= tc) {
7040b57cec5SDimitry Andric         /* chunk size too large, switch to dynamic */
7050b57cec5SDimitry Andric         schedule = kmp_sch_dynamic_chunked;
706fe6060f1SDimitry Andric         goto dynamic_init;
7070b57cec5SDimitry Andric       } else {
7080b57cec5SDimitry Andric         /* commonly used term: (2 nproc - 1)/(2 nproc) */
7090b57cec5SDimitry Andric         DBL x;
7100b57cec5SDimitry Andric 
7110b57cec5SDimitry Andric #if KMP_USE_X87CONTROL
7120b57cec5SDimitry Andric         /* Linux* OS already has 64-bit computation by default for long double,
7130b57cec5SDimitry Andric            and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On
7140b57cec5SDimitry Andric            Windows* OS on IA-32 architecture, we need to set precision to 64-bit
7150b57cec5SDimitry Andric            instead of the default 53-bit. Even though long double doesn't work
7160b57cec5SDimitry Andric            on Windows* OS on Intel(R) 64, the resulting lack of precision is not
7170b57cec5SDimitry Andric            expected to impact the correctness of the algorithm, but this has not
7180b57cec5SDimitry Andric            been mathematically proven. */
7190b57cec5SDimitry Andric         // save original FPCW and set precision to 64-bit, as
7200b57cec5SDimitry Andric         // Windows* OS on IA-32 architecture defaults to 53-bit
7210b57cec5SDimitry Andric         unsigned int oldFpcw = _control87(0, 0);
7220b57cec5SDimitry Andric         _control87(_PC_64, _MCW_PC); // 0,0x30000
7230b57cec5SDimitry Andric #endif
7240b57cec5SDimitry Andric         /* value used for comparison in solver for cross-over point */
725349cc55cSDimitry Andric         KMP_ASSERT(tc > 0);
7260b57cec5SDimitry Andric         long double target = ((long double)chunk * 2 + 1) * nproc / tc;
7270b57cec5SDimitry Andric 
7280b57cec5SDimitry Andric         /* crossover point--chunk indexes equal to or greater than
7290b57cec5SDimitry Andric            this point switch to dynamic-style scheduling */
7300b57cec5SDimitry Andric         UT cross;
7310b57cec5SDimitry Andric 
7320b57cec5SDimitry Andric         /* commonly used term: (2 nproc - 1)/(2 nproc) */
733e8d8bef9SDimitry Andric         x = 1.0 - 0.5 / (double)nproc;
7340b57cec5SDimitry Andric 
7350b57cec5SDimitry Andric #ifdef KMP_DEBUG
7360b57cec5SDimitry Andric         { // test natural alignment
7370b57cec5SDimitry Andric           struct _test_a {
7380b57cec5SDimitry Andric             char a;
7390b57cec5SDimitry Andric             union {
7400b57cec5SDimitry Andric               char b;
7410b57cec5SDimitry Andric               DBL d;
7420b57cec5SDimitry Andric             };
7430b57cec5SDimitry Andric           } t;
7440b57cec5SDimitry Andric           ptrdiff_t natural_alignment =
7450b57cec5SDimitry Andric               (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
7460b57cec5SDimitry Andric           //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long
7470b57cec5SDimitry Andric           // long)natural_alignment );
7480b57cec5SDimitry Andric           KMP_DEBUG_ASSERT(
7490b57cec5SDimitry Andric               (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0);
7500b57cec5SDimitry Andric         }
7510b57cec5SDimitry Andric #endif // KMP_DEBUG
7520b57cec5SDimitry Andric 
7530b57cec5SDimitry Andric         /* save the term in thread private dispatch structure */
7540b57cec5SDimitry Andric         *(DBL *)&pr->u.p.parm3 = x;
7550b57cec5SDimitry Andric 
7560b57cec5SDimitry Andric         /* solve for the crossover point to the nearest integer i for which C_i
7570b57cec5SDimitry Andric            <= chunk */
7580b57cec5SDimitry Andric         {
7590b57cec5SDimitry Andric           UT left, right, mid;
7600b57cec5SDimitry Andric           long double p;
7610b57cec5SDimitry Andric 
7620b57cec5SDimitry Andric           /* estimate initial upper and lower bound */
7630b57cec5SDimitry Andric 
7640b57cec5SDimitry Andric           /* doesn't matter what value right is as long as it is positive, but
7650b57cec5SDimitry Andric              it affects performance of the solver */
7660b57cec5SDimitry Andric           right = 229;
7670b57cec5SDimitry Andric           p = __kmp_pow<UT>(x, right);
7680b57cec5SDimitry Andric           if (p > target) {
7690b57cec5SDimitry Andric             do {
7700b57cec5SDimitry Andric               p *= p;
7710b57cec5SDimitry Andric               right <<= 1;
7720b57cec5SDimitry Andric             } while (p > target && right < (1 << 27));
7730b57cec5SDimitry Andric             /* lower bound is previous (failed) estimate of upper bound */
7740b57cec5SDimitry Andric             left = right >> 1;
7750b57cec5SDimitry Andric           } else {
7760b57cec5SDimitry Andric             left = 0;
7770b57cec5SDimitry Andric           }
7780b57cec5SDimitry Andric 
7790b57cec5SDimitry Andric           /* bisection root-finding method */
7800b57cec5SDimitry Andric           while (left + 1 < right) {
7810b57cec5SDimitry Andric             mid = (left + right) / 2;
7820b57cec5SDimitry Andric             if (__kmp_pow<UT>(x, mid) > target) {
7830b57cec5SDimitry Andric               left = mid;
7840b57cec5SDimitry Andric             } else {
7850b57cec5SDimitry Andric               right = mid;
7860b57cec5SDimitry Andric             }
7870b57cec5SDimitry Andric           } // while
7880b57cec5SDimitry Andric           cross = right;
7890b57cec5SDimitry Andric         }
7900b57cec5SDimitry Andric         /* assert sanity of computed crossover point */
7910b57cec5SDimitry Andric         KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&
7920b57cec5SDimitry Andric                    __kmp_pow<UT>(x, cross) <= target);
7930b57cec5SDimitry Andric 
7940b57cec5SDimitry Andric         /* save the crossover point in thread private dispatch structure */
7950b57cec5SDimitry Andric         pr->u.p.parm2 = cross;
7960b57cec5SDimitry Andric 
7970b57cec5SDimitry Andric // C75803
7980b57cec5SDimitry Andric #if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8))
7990b57cec5SDimitry Andric #define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3)
8000b57cec5SDimitry Andric #else
8010b57cec5SDimitry Andric #define GUIDED_ANALYTICAL_WORKAROUND (x)
8020b57cec5SDimitry Andric #endif
8030b57cec5SDimitry Andric         /* dynamic-style scheduling offset */
804fe6060f1SDimitry Andric         pr->u.p.count = tc -
805fe6060f1SDimitry Andric                         __kmp_dispatch_guided_remaining(
8060b57cec5SDimitry Andric                             tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
8070b57cec5SDimitry Andric                         cross * chunk;
8080b57cec5SDimitry Andric #if KMP_USE_X87CONTROL
8090b57cec5SDimitry Andric         // restore FPCW
8100b57cec5SDimitry Andric         _control87(oldFpcw, _MCW_PC);
8110b57cec5SDimitry Andric #endif
8120b57cec5SDimitry Andric       } // if
8130b57cec5SDimitry Andric     } else {
8140b57cec5SDimitry Andric       KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
8150b57cec5SDimitry Andric                      "kmp_sch_static_greedy\n",
8160b57cec5SDimitry Andric                      gtid));
8170b57cec5SDimitry Andric       schedule = kmp_sch_static_greedy;
8180b57cec5SDimitry Andric       /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
8190b57cec5SDimitry Andric       pr->u.p.parm1 = tc;
8200b57cec5SDimitry Andric     } // if
8210b57cec5SDimitry Andric   } // case
8220b57cec5SDimitry Andric   break;
8230b57cec5SDimitry Andric   case kmp_sch_static_greedy:
8240b57cec5SDimitry Andric     KD_TRACE(
8250b57cec5SDimitry Andric         100,
8260b57cec5SDimitry Andric         ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
8270b57cec5SDimitry Andric          gtid));
8280b57cec5SDimitry Andric     pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc;
8290b57cec5SDimitry Andric     break;
8300b57cec5SDimitry Andric   case kmp_sch_static_chunked:
8310b57cec5SDimitry Andric   case kmp_sch_dynamic_chunked:
832fe6060f1SDimitry Andric   dynamic_init:
833349cc55cSDimitry Andric     if (tc == 0)
834349cc55cSDimitry Andric       break;
835fe6060f1SDimitry Andric     if (pr->u.p.parm1 <= 0)
8360b57cec5SDimitry Andric       pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
837fe6060f1SDimitry Andric     else if (pr->u.p.parm1 > tc)
838fe6060f1SDimitry Andric       pr->u.p.parm1 = tc;
839fe6060f1SDimitry Andric     // Store the total number of chunks to prevent integer overflow during
840fe6060f1SDimitry Andric     // bounds calculations in the get next chunk routine.
841fe6060f1SDimitry Andric     pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0);
8420b57cec5SDimitry Andric     KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "
8430b57cec5SDimitry Andric                    "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
8440b57cec5SDimitry Andric                    gtid));
8450b57cec5SDimitry Andric     break;
8460b57cec5SDimitry Andric   case kmp_sch_trapezoidal: {
8470b57cec5SDimitry Andric     /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */
8480b57cec5SDimitry Andric 
8490b57cec5SDimitry Andric     T parm1, parm2, parm3, parm4;
8500b57cec5SDimitry Andric     KD_TRACE(100,
8510b57cec5SDimitry Andric              ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",
8520b57cec5SDimitry Andric               gtid));
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric     parm1 = chunk;
8550b57cec5SDimitry Andric 
8560b57cec5SDimitry Andric     /* F : size of the first cycle */
8570b57cec5SDimitry Andric     parm2 = (tc / (2 * nproc));
8580b57cec5SDimitry Andric 
8590b57cec5SDimitry Andric     if (parm2 < 1) {
8600b57cec5SDimitry Andric       parm2 = 1;
8610b57cec5SDimitry Andric     }
8620b57cec5SDimitry Andric 
8630b57cec5SDimitry Andric     /* L : size of the last cycle.  Make sure the last cycle is not larger
8640b57cec5SDimitry Andric        than the first cycle. */
8650b57cec5SDimitry Andric     if (parm1 < 1) {
8660b57cec5SDimitry Andric       parm1 = 1;
8670b57cec5SDimitry Andric     } else if (parm1 > parm2) {
8680b57cec5SDimitry Andric       parm1 = parm2;
8690b57cec5SDimitry Andric     }
8700b57cec5SDimitry Andric 
8710b57cec5SDimitry Andric     /* N : number of cycles */
8720b57cec5SDimitry Andric     parm3 = (parm2 + parm1);
8730b57cec5SDimitry Andric     parm3 = (2 * tc + parm3 - 1) / parm3;
8740b57cec5SDimitry Andric 
8750b57cec5SDimitry Andric     if (parm3 < 2) {
8760b57cec5SDimitry Andric       parm3 = 2;
8770b57cec5SDimitry Andric     }
8780b57cec5SDimitry Andric 
8790b57cec5SDimitry Andric     /* sigma : decreasing incr of the trapezoid */
8800b57cec5SDimitry Andric     parm4 = (parm3 - 1);
8810b57cec5SDimitry Andric     parm4 = (parm2 - parm1) / parm4;
8820b57cec5SDimitry Andric 
8830b57cec5SDimitry Andric     // pointless check, because parm4 >= 0 always
8840b57cec5SDimitry Andric     // if ( parm4 < 0 ) {
8850b57cec5SDimitry Andric     //    parm4 = 0;
8860b57cec5SDimitry Andric     //}
8870b57cec5SDimitry Andric 
8880b57cec5SDimitry Andric     pr->u.p.parm1 = parm1;
8890b57cec5SDimitry Andric     pr->u.p.parm2 = parm2;
8900b57cec5SDimitry Andric     pr->u.p.parm3 = parm3;
8910b57cec5SDimitry Andric     pr->u.p.parm4 = parm4;
8920b57cec5SDimitry Andric   } // case
8930b57cec5SDimitry Andric   break;
8940b57cec5SDimitry Andric 
8950b57cec5SDimitry Andric   default: {
8960b57cec5SDimitry Andric     __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected), // Primary message
8970b57cec5SDimitry Andric                 KMP_HNT(GetNewerLibrary), // Hint
8980b57cec5SDimitry Andric                 __kmp_msg_null // Variadic argument list terminator
8990b57cec5SDimitry Andric     );
9000b57cec5SDimitry Andric   } break;
9010b57cec5SDimitry Andric   } // switch
9020b57cec5SDimitry Andric   pr->schedule = schedule;
9030b57cec5SDimitry Andric }
9040b57cec5SDimitry Andric 
9050b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
9060b57cec5SDimitry Andric template <typename T>
9070b57cec5SDimitry Andric inline void __kmp_dispatch_init_hier_runtime(ident_t *loc, T lb, T ub,
9080b57cec5SDimitry Andric                                              typename traits_t<T>::signed_t st);
9090b57cec5SDimitry Andric template <>
9100b57cec5SDimitry Andric inline void
9110b57cec5SDimitry Andric __kmp_dispatch_init_hier_runtime<kmp_int32>(ident_t *loc, kmp_int32 lb,
9120b57cec5SDimitry Andric                                             kmp_int32 ub, kmp_int32 st) {
9130b57cec5SDimitry Andric   __kmp_dispatch_init_hierarchy<kmp_int32>(
9140b57cec5SDimitry Andric       loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
9150b57cec5SDimitry Andric       __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
9160b57cec5SDimitry Andric }
9170b57cec5SDimitry Andric template <>
9180b57cec5SDimitry Andric inline void
9190b57cec5SDimitry Andric __kmp_dispatch_init_hier_runtime<kmp_uint32>(ident_t *loc, kmp_uint32 lb,
9200b57cec5SDimitry Andric                                              kmp_uint32 ub, kmp_int32 st) {
9210b57cec5SDimitry Andric   __kmp_dispatch_init_hierarchy<kmp_uint32>(
9220b57cec5SDimitry Andric       loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
9230b57cec5SDimitry Andric       __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
9240b57cec5SDimitry Andric }
9250b57cec5SDimitry Andric template <>
9260b57cec5SDimitry Andric inline void
9270b57cec5SDimitry Andric __kmp_dispatch_init_hier_runtime<kmp_int64>(ident_t *loc, kmp_int64 lb,
9280b57cec5SDimitry Andric                                             kmp_int64 ub, kmp_int64 st) {
9290b57cec5SDimitry Andric   __kmp_dispatch_init_hierarchy<kmp_int64>(
9300b57cec5SDimitry Andric       loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
9310b57cec5SDimitry Andric       __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
9320b57cec5SDimitry Andric }
9330b57cec5SDimitry Andric template <>
9340b57cec5SDimitry Andric inline void
9350b57cec5SDimitry Andric __kmp_dispatch_init_hier_runtime<kmp_uint64>(ident_t *loc, kmp_uint64 lb,
9360b57cec5SDimitry Andric                                              kmp_uint64 ub, kmp_int64 st) {
9370b57cec5SDimitry Andric   __kmp_dispatch_init_hierarchy<kmp_uint64>(
9380b57cec5SDimitry Andric       loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
9390b57cec5SDimitry Andric       __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
9400b57cec5SDimitry Andric }
9410b57cec5SDimitry Andric 
9420b57cec5SDimitry Andric // free all the hierarchy scheduling memory associated with the team
9430b57cec5SDimitry Andric void __kmp_dispatch_free_hierarchies(kmp_team_t *team) {
9440b57cec5SDimitry Andric   int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
9450b57cec5SDimitry Andric   for (int i = 0; i < num_disp_buff; ++i) {
9460b57cec5SDimitry Andric     // type does not matter here so use kmp_int32
9470b57cec5SDimitry Andric     auto sh =
9480b57cec5SDimitry Andric         reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
9490b57cec5SDimitry Andric             &team->t.t_disp_buffer[i]);
9500b57cec5SDimitry Andric     if (sh->hier) {
9510b57cec5SDimitry Andric       sh->hier->deallocate();
9520b57cec5SDimitry Andric       __kmp_free(sh->hier);
9530b57cec5SDimitry Andric     }
9540b57cec5SDimitry Andric   }
9550b57cec5SDimitry Andric }
9560b57cec5SDimitry Andric #endif
9570b57cec5SDimitry Andric 
9580b57cec5SDimitry Andric // UT - unsigned flavor of T, ST - signed flavor of T,
9590b57cec5SDimitry Andric // DBL - double if sizeof(T)==4, or long double if sizeof(T)==8
9600b57cec5SDimitry Andric template <typename T>
9610b57cec5SDimitry Andric static void
9620b57cec5SDimitry Andric __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
9630b57cec5SDimitry Andric                     T ub, typename traits_t<T>::signed_t st,
9640b57cec5SDimitry Andric                     typename traits_t<T>::signed_t chunk, int push_ws) {
9650b57cec5SDimitry Andric   typedef typename traits_t<T>::unsigned_t UT;
9660b57cec5SDimitry Andric 
9670b57cec5SDimitry Andric   int active;
9680b57cec5SDimitry Andric   kmp_info_t *th;
9690b57cec5SDimitry Andric   kmp_team_t *team;
9700b57cec5SDimitry Andric   kmp_uint32 my_buffer_index;
9710b57cec5SDimitry Andric   dispatch_private_info_template<T> *pr;
9720b57cec5SDimitry Andric   dispatch_shared_info_template<T> volatile *sh;
9730b57cec5SDimitry Andric 
9740b57cec5SDimitry Andric   KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template<T>) ==
9750b57cec5SDimitry Andric                    sizeof(dispatch_private_info));
9760b57cec5SDimitry Andric   KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template<UT>) ==
9770b57cec5SDimitry Andric                    sizeof(dispatch_shared_info));
978e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
9790b57cec5SDimitry Andric 
9800b57cec5SDimitry Andric   if (!TCR_4(__kmp_init_parallel))
9810b57cec5SDimitry Andric     __kmp_parallel_initialize();
9820b57cec5SDimitry Andric 
9830b57cec5SDimitry Andric   __kmp_resume_if_soft_paused();
9840b57cec5SDimitry Andric 
9850b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS
9860b57cec5SDimitry Andric   SSC_MARK_DISPATCH_INIT();
9870b57cec5SDimitry Andric #endif
9880b57cec5SDimitry Andric #ifdef KMP_DEBUG
9890b57cec5SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
9900b57cec5SDimitry Andric   {
9910b57cec5SDimitry Andric     char *buff;
9920b57cec5SDimitry Andric     // create format specifiers before the debug output
9930b57cec5SDimitry Andric     buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d "
9940b57cec5SDimitry Andric                             "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
9950b57cec5SDimitry Andric                             traits_t<ST>::spec, traits_t<T>::spec,
9960b57cec5SDimitry Andric                             traits_t<T>::spec, traits_t<ST>::spec);
9970b57cec5SDimitry Andric     KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st));
9980b57cec5SDimitry Andric     __kmp_str_free(&buff);
9990b57cec5SDimitry Andric   }
10000b57cec5SDimitry Andric #endif
10010b57cec5SDimitry Andric   /* setup data */
10020b57cec5SDimitry Andric   th = __kmp_threads[gtid];
10030b57cec5SDimitry Andric   team = th->th.th_team;
10040b57cec5SDimitry Andric   active = !team->t.t_serialized;
10050b57cec5SDimitry Andric   th->th.th_ident = loc;
10060b57cec5SDimitry Andric 
10070b57cec5SDimitry Andric   // Any half-decent optimizer will remove this test when the blocks are empty
10080b57cec5SDimitry Andric   // since the macros expand to nothing
10090b57cec5SDimitry Andric   // when statistics are disabled.
10100b57cec5SDimitry Andric   if (schedule == __kmp_static) {
10110b57cec5SDimitry Andric     KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
10120b57cec5SDimitry Andric   } else {
10130b57cec5SDimitry Andric     KMP_COUNT_BLOCK(OMP_LOOP_DYNAMIC);
10140b57cec5SDimitry Andric   }
10150b57cec5SDimitry Andric 
10160b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
10170b57cec5SDimitry Andric   // Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable
10180b57cec5SDimitry Andric   // Hierarchical scheduling does not work with ordered, so if ordered is
10190b57cec5SDimitry Andric   // detected, then revert back to threaded scheduling.
10200b57cec5SDimitry Andric   bool ordered;
10210b57cec5SDimitry Andric   enum sched_type my_sched = schedule;
10220b57cec5SDimitry Andric   my_buffer_index = th->th.th_dispatch->th_disp_index;
10230b57cec5SDimitry Andric   pr = reinterpret_cast<dispatch_private_info_template<T> *>(
10240b57cec5SDimitry Andric       &th->th.th_dispatch
10250b57cec5SDimitry Andric            ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
10260b57cec5SDimitry Andric   my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched);
10270b57cec5SDimitry Andric   if ((my_sched >= kmp_nm_lower) && (my_sched < kmp_nm_upper))
10280b57cec5SDimitry Andric     my_sched =
10290b57cec5SDimitry Andric         (enum sched_type)(((int)my_sched) - (kmp_nm_lower - kmp_sch_lower));
10300b57cec5SDimitry Andric   ordered = (kmp_ord_lower & my_sched);
10310b57cec5SDimitry Andric   if (pr->flags.use_hier) {
10320b57cec5SDimitry Andric     if (ordered) {
10330b57cec5SDimitry Andric       KD_TRACE(100, ("__kmp_dispatch_init: T#%d ordered loop detected.  "
10340b57cec5SDimitry Andric                      "Disabling hierarchical scheduling.\n",
10350b57cec5SDimitry Andric                      gtid));
10360b57cec5SDimitry Andric       pr->flags.use_hier = FALSE;
10370b57cec5SDimitry Andric     }
10380b57cec5SDimitry Andric   }
10390b57cec5SDimitry Andric   if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) {
10400b57cec5SDimitry Andric     // Don't use hierarchical for ordered parallel loops and don't
10410b57cec5SDimitry Andric     // use the runtime hierarchy if one was specified in the program
10420b57cec5SDimitry Andric     if (!ordered && !pr->flags.use_hier)
10430b57cec5SDimitry Andric       __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st);
10440b57cec5SDimitry Andric   }
10450b57cec5SDimitry Andric #endif // KMP_USE_HIER_SCHED
10460b57cec5SDimitry Andric 
10470b57cec5SDimitry Andric #if USE_ITT_BUILD
10480b57cec5SDimitry Andric   kmp_uint64 cur_chunk = chunk;
10490b57cec5SDimitry Andric   int itt_need_metadata_reporting =
10500b57cec5SDimitry Andric       __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
10510b57cec5SDimitry Andric       KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
10520b57cec5SDimitry Andric       team->t.t_active_level == 1;
10530b57cec5SDimitry Andric #endif
10540b57cec5SDimitry Andric   if (!active) {
10550b57cec5SDimitry Andric     pr = reinterpret_cast<dispatch_private_info_template<T> *>(
10560b57cec5SDimitry Andric         th->th.th_dispatch->th_disp_buffer); /* top of the stack */
10570b57cec5SDimitry Andric   } else {
10580b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(th->th.th_dispatch ==
10590b57cec5SDimitry Andric                      &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
10600b57cec5SDimitry Andric 
10610b57cec5SDimitry Andric     my_buffer_index = th->th.th_dispatch->th_disp_index++;
10620b57cec5SDimitry Andric 
10630b57cec5SDimitry Andric     /* What happens when number of threads changes, need to resize buffer? */
10640b57cec5SDimitry Andric     pr = reinterpret_cast<dispatch_private_info_template<T> *>(
10650b57cec5SDimitry Andric         &th->th.th_dispatch
10660b57cec5SDimitry Andric              ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
10670b57cec5SDimitry Andric     sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
10680b57cec5SDimitry Andric         &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
10690b57cec5SDimitry Andric     KD_TRACE(10, ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,
10700b57cec5SDimitry Andric                   my_buffer_index));
1071fe6060f1SDimitry Andric     if (sh->buffer_index != my_buffer_index) { // too many loops in progress?
1072fe6060f1SDimitry Andric       KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d"
1073fe6060f1SDimitry Andric                      " sh->buffer_index:%d\n",
1074fe6060f1SDimitry Andric                      gtid, my_buffer_index, sh->buffer_index));
1075fe6060f1SDimitry Andric       __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
1076fe6060f1SDimitry Andric                              __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
1077fe6060f1SDimitry Andric       // Note: KMP_WAIT() cannot be used there: buffer index and
1078fe6060f1SDimitry Andric       // my_buffer_index are *always* 32-bit integers.
1079fe6060f1SDimitry Andric       KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
1080fe6060f1SDimitry Andric                      "sh->buffer_index:%d\n",
1081fe6060f1SDimitry Andric                      gtid, my_buffer_index, sh->buffer_index));
1082fe6060f1SDimitry Andric     }
10830b57cec5SDimitry Andric   }
10840b57cec5SDimitry Andric 
10850b57cec5SDimitry Andric   __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st,
10860b57cec5SDimitry Andric #if USE_ITT_BUILD
10870b57cec5SDimitry Andric                                 &cur_chunk,
10880b57cec5SDimitry Andric #endif
10890b57cec5SDimitry Andric                                 chunk, (T)th->th.th_team_nproc,
10900b57cec5SDimitry Andric                                 (T)th->th.th_info.ds.ds_tid);
10910b57cec5SDimitry Andric   if (active) {
10920b57cec5SDimitry Andric     if (pr->flags.ordered == 0) {
10930b57cec5SDimitry Andric       th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
10940b57cec5SDimitry Andric       th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
10950b57cec5SDimitry Andric     } else {
10960b57cec5SDimitry Andric       th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
10970b57cec5SDimitry Andric       th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
10980b57cec5SDimitry Andric     }
10990b57cec5SDimitry Andric     th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
11000b57cec5SDimitry Andric     th->th.th_dispatch->th_dispatch_sh_current =
11010b57cec5SDimitry Andric         CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh);
11020b57cec5SDimitry Andric #if USE_ITT_BUILD
11030b57cec5SDimitry Andric     if (pr->flags.ordered) {
11040b57cec5SDimitry Andric       __kmp_itt_ordered_init(gtid);
11050b57cec5SDimitry Andric     }
11060b57cec5SDimitry Andric     // Report loop metadata
11070b57cec5SDimitry Andric     if (itt_need_metadata_reporting) {
1108fe6060f1SDimitry Andric       // Only report metadata by primary thread of active team at level 1
11090b57cec5SDimitry Andric       kmp_uint64 schedtype = 0;
11100b57cec5SDimitry Andric       switch (schedule) {
11110b57cec5SDimitry Andric       case kmp_sch_static_chunked:
11120b57cec5SDimitry Andric       case kmp_sch_static_balanced: // Chunk is calculated in the switch above
11130b57cec5SDimitry Andric         break;
11140b57cec5SDimitry Andric       case kmp_sch_static_greedy:
11150b57cec5SDimitry Andric         cur_chunk = pr->u.p.parm1;
11160b57cec5SDimitry Andric         break;
11170b57cec5SDimitry Andric       case kmp_sch_dynamic_chunked:
11180b57cec5SDimitry Andric         schedtype = 1;
11190b57cec5SDimitry Andric         break;
11200b57cec5SDimitry Andric       case kmp_sch_guided_iterative_chunked:
11210b57cec5SDimitry Andric       case kmp_sch_guided_analytical_chunked:
11220b57cec5SDimitry Andric       case kmp_sch_guided_simd:
11230b57cec5SDimitry Andric         schedtype = 2;
11240b57cec5SDimitry Andric         break;
11250b57cec5SDimitry Andric       default:
11260b57cec5SDimitry Andric         // Should we put this case under "static"?
11270b57cec5SDimitry Andric         // case kmp_sch_static_steal:
11280b57cec5SDimitry Andric         schedtype = 3;
11290b57cec5SDimitry Andric         break;
11300b57cec5SDimitry Andric       }
11310b57cec5SDimitry Andric       __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk);
11320b57cec5SDimitry Andric     }
11330b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
11340b57cec5SDimitry Andric     if (pr->flags.use_hier) {
11350b57cec5SDimitry Andric       pr->u.p.count = 0;
11360b57cec5SDimitry Andric       pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0;
11370b57cec5SDimitry Andric     }
11380b57cec5SDimitry Andric #endif // KMP_USER_HIER_SCHED
11390b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
11400b57cec5SDimitry Andric   }
11410b57cec5SDimitry Andric 
11420b57cec5SDimitry Andric #ifdef KMP_DEBUG
11430b57cec5SDimitry Andric   {
11440b57cec5SDimitry Andric     char *buff;
11450b57cec5SDimitry Andric     // create format specifiers before the debug output
11460b57cec5SDimitry Andric     buff = __kmp_str_format(
11470b57cec5SDimitry Andric         "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s "
11480b57cec5SDimitry Andric         "lb:%%%s ub:%%%s"
11490b57cec5SDimitry Andric         " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s"
11500b57cec5SDimitry Andric         " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
11510b57cec5SDimitry Andric         traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
11520b57cec5SDimitry Andric         traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
11530b57cec5SDimitry Andric         traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
11540b57cec5SDimitry Andric         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
11550b57cec5SDimitry Andric     KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,
11560b57cec5SDimitry Andric                   pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,
11570b57cec5SDimitry Andric                   pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
11580b57cec5SDimitry Andric                   pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4));
11590b57cec5SDimitry Andric     __kmp_str_free(&buff);
11600b57cec5SDimitry Andric   }
11610b57cec5SDimitry Andric #endif
11620b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
11630b57cec5SDimitry Andric   if (ompt_enabled.ompt_callback_work) {
11640b57cec5SDimitry Andric     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
11650b57cec5SDimitry Andric     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
11660b57cec5SDimitry Andric     ompt_callbacks.ompt_callback(ompt_callback_work)(
1167*0fca6ea1SDimitry Andric         ompt_get_work_schedule(pr->schedule), ompt_scope_begin,
1168*0fca6ea1SDimitry Andric         &(team_info->parallel_data), &(task_info->task_data), pr->u.p.tc,
1169*0fca6ea1SDimitry Andric         OMPT_LOAD_RETURN_ADDRESS(gtid));
11700b57cec5SDimitry Andric   }
11710b57cec5SDimitry Andric #endif
11720b57cec5SDimitry Andric   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic);
11730b57cec5SDimitry Andric }
11740b57cec5SDimitry Andric 
11750b57cec5SDimitry Andric /* For ordered loops, either __kmp_dispatch_finish() should be called after
11760b57cec5SDimitry Andric  * every iteration, or __kmp_dispatch_finish_chunk() should be called after
11770b57cec5SDimitry Andric  * every chunk of iterations.  If the ordered section(s) were not executed
11780b57cec5SDimitry Andric  * for this iteration (or every iteration in this chunk), we need to set the
11790b57cec5SDimitry Andric  * ordered iteration counters so that the next thread can proceed. */
11800b57cec5SDimitry Andric template <typename UT>
11810b57cec5SDimitry Andric static void __kmp_dispatch_finish(int gtid, ident_t *loc) {
11820b57cec5SDimitry Andric   typedef typename traits_t<UT>::signed_t ST;
1183e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
11840b57cec5SDimitry Andric   kmp_info_t *th = __kmp_threads[gtid];
11850b57cec5SDimitry Andric 
11860b57cec5SDimitry Andric   KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid));
11870b57cec5SDimitry Andric   if (!th->th.th_team->t.t_serialized) {
11880b57cec5SDimitry Andric 
11890b57cec5SDimitry Andric     dispatch_private_info_template<UT> *pr =
11900b57cec5SDimitry Andric         reinterpret_cast<dispatch_private_info_template<UT> *>(
11910b57cec5SDimitry Andric             th->th.th_dispatch->th_dispatch_pr_current);
11920b57cec5SDimitry Andric     dispatch_shared_info_template<UT> volatile *sh =
11930b57cec5SDimitry Andric         reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
11940b57cec5SDimitry Andric             th->th.th_dispatch->th_dispatch_sh_current);
11950b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(pr);
11960b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(sh);
11970b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(th->th.th_dispatch ==
11980b57cec5SDimitry Andric                      &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
11990b57cec5SDimitry Andric 
12000b57cec5SDimitry Andric     if (pr->ordered_bumped) {
12010b57cec5SDimitry Andric       KD_TRACE(
12020b57cec5SDimitry Andric           1000,
12030b57cec5SDimitry Andric           ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
12040b57cec5SDimitry Andric            gtid));
12050b57cec5SDimitry Andric       pr->ordered_bumped = 0;
12060b57cec5SDimitry Andric     } else {
12070b57cec5SDimitry Andric       UT lower = pr->u.p.ordered_lower;
12080b57cec5SDimitry Andric 
12090b57cec5SDimitry Andric #ifdef KMP_DEBUG
12100b57cec5SDimitry Andric       {
12110b57cec5SDimitry Andric         char *buff;
12120b57cec5SDimitry Andric         // create format specifiers before the debug output
12130b57cec5SDimitry Andric         buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: "
12140b57cec5SDimitry Andric                                 "ordered_iteration:%%%s lower:%%%s\n",
12150b57cec5SDimitry Andric                                 traits_t<UT>::spec, traits_t<UT>::spec);
12160b57cec5SDimitry Andric         KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
12170b57cec5SDimitry Andric         __kmp_str_free(&buff);
12180b57cec5SDimitry Andric       }
12190b57cec5SDimitry Andric #endif
12200b57cec5SDimitry Andric 
12210b57cec5SDimitry Andric       __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
12220b57cec5SDimitry Andric                      __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
12230b57cec5SDimitry Andric       KMP_MB(); /* is this necessary? */
12240b57cec5SDimitry Andric #ifdef KMP_DEBUG
12250b57cec5SDimitry Andric       {
12260b57cec5SDimitry Andric         char *buff;
12270b57cec5SDimitry Andric         // create format specifiers before the debug output
12280b57cec5SDimitry Andric         buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: "
12290b57cec5SDimitry Andric                                 "ordered_iteration:%%%s lower:%%%s\n",
12300b57cec5SDimitry Andric                                 traits_t<UT>::spec, traits_t<UT>::spec);
12310b57cec5SDimitry Andric         KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
12320b57cec5SDimitry Andric         __kmp_str_free(&buff);
12330b57cec5SDimitry Andric       }
12340b57cec5SDimitry Andric #endif
12350b57cec5SDimitry Andric 
12360b57cec5SDimitry Andric       test_then_inc<ST>((volatile ST *)&sh->u.s.ordered_iteration);
12370b57cec5SDimitry Andric     } // if
12380b57cec5SDimitry Andric   } // if
12390b57cec5SDimitry Andric   KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid));
12400b57cec5SDimitry Andric }
12410b57cec5SDimitry Andric 
12420b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT
12430b57cec5SDimitry Andric 
12440b57cec5SDimitry Andric template <typename UT>
12450b57cec5SDimitry Andric static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
12460b57cec5SDimitry Andric   typedef typename traits_t<UT>::signed_t ST;
1247e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
12480b57cec5SDimitry Andric   kmp_info_t *th = __kmp_threads[gtid];
12490b57cec5SDimitry Andric 
12500b57cec5SDimitry Andric   KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid));
12510b57cec5SDimitry Andric   if (!th->th.th_team->t.t_serialized) {
12520b57cec5SDimitry Andric     dispatch_private_info_template<UT> *pr =
12530b57cec5SDimitry Andric         reinterpret_cast<dispatch_private_info_template<UT> *>(
12540b57cec5SDimitry Andric             th->th.th_dispatch->th_dispatch_pr_current);
12550b57cec5SDimitry Andric     dispatch_shared_info_template<UT> volatile *sh =
12560b57cec5SDimitry Andric         reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
12570b57cec5SDimitry Andric             th->th.th_dispatch->th_dispatch_sh_current);
12580b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(pr);
12590b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(sh);
12600b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(th->th.th_dispatch ==
12610b57cec5SDimitry Andric                      &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
12620b57cec5SDimitry Andric 
12630b57cec5SDimitry Andric     UT lower = pr->u.p.ordered_lower;
12640b57cec5SDimitry Andric     UT upper = pr->u.p.ordered_upper;
12650b57cec5SDimitry Andric     UT inc = upper - lower + 1;
12660b57cec5SDimitry Andric 
12670b57cec5SDimitry Andric     if (pr->ordered_bumped == inc) {
12680b57cec5SDimitry Andric       KD_TRACE(
12690b57cec5SDimitry Andric           1000,
12700b57cec5SDimitry Andric           ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
12710b57cec5SDimitry Andric            gtid));
12720b57cec5SDimitry Andric       pr->ordered_bumped = 0;
12730b57cec5SDimitry Andric     } else {
12740b57cec5SDimitry Andric       inc -= pr->ordered_bumped;
12750b57cec5SDimitry Andric 
12760b57cec5SDimitry Andric #ifdef KMP_DEBUG
12770b57cec5SDimitry Andric       {
12780b57cec5SDimitry Andric         char *buff;
12790b57cec5SDimitry Andric         // create format specifiers before the debug output
12800b57cec5SDimitry Andric         buff = __kmp_str_format(
12810b57cec5SDimitry Andric             "__kmp_dispatch_finish_chunk: T#%%d before wait: "
12820b57cec5SDimitry Andric             "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
12830b57cec5SDimitry Andric             traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
12840b57cec5SDimitry Andric         KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper));
12850b57cec5SDimitry Andric         __kmp_str_free(&buff);
12860b57cec5SDimitry Andric       }
12870b57cec5SDimitry Andric #endif
12880b57cec5SDimitry Andric 
12890b57cec5SDimitry Andric       __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
12900b57cec5SDimitry Andric                      __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
12910b57cec5SDimitry Andric 
12920b57cec5SDimitry Andric       KMP_MB(); /* is this necessary? */
12930b57cec5SDimitry Andric       KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "
12940b57cec5SDimitry Andric                       "ordered_bumped to zero\n",
12950b57cec5SDimitry Andric                       gtid));
12960b57cec5SDimitry Andric       pr->ordered_bumped = 0;
12970b57cec5SDimitry Andric //!!!!! TODO check if the inc should be unsigned, or signed???
12980b57cec5SDimitry Andric #ifdef KMP_DEBUG
12990b57cec5SDimitry Andric       {
13000b57cec5SDimitry Andric         char *buff;
13010b57cec5SDimitry Andric         // create format specifiers before the debug output
13020b57cec5SDimitry Andric         buff = __kmp_str_format(
13030b57cec5SDimitry Andric             "__kmp_dispatch_finish_chunk: T#%%d after wait: "
13040b57cec5SDimitry Andric             "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
13050b57cec5SDimitry Andric             traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
13060b57cec5SDimitry Andric             traits_t<UT>::spec);
13070b57cec5SDimitry Andric         KD_TRACE(1000,
13080b57cec5SDimitry Andric                  (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper));
13090b57cec5SDimitry Andric         __kmp_str_free(&buff);
13100b57cec5SDimitry Andric       }
13110b57cec5SDimitry Andric #endif
13120b57cec5SDimitry Andric 
13130b57cec5SDimitry Andric       test_then_add<ST>((volatile ST *)&sh->u.s.ordered_iteration, inc);
13140b57cec5SDimitry Andric     }
13150b57cec5SDimitry Andric     //        }
13160b57cec5SDimitry Andric   }
13170b57cec5SDimitry Andric   KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid));
13180b57cec5SDimitry Andric }
13190b57cec5SDimitry Andric 
13200b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */
13210b57cec5SDimitry Andric 
13220b57cec5SDimitry Andric template <typename T>
13230b57cec5SDimitry Andric int __kmp_dispatch_next_algorithm(int gtid,
13240b57cec5SDimitry Andric                                   dispatch_private_info_template<T> *pr,
13250b57cec5SDimitry Andric                                   dispatch_shared_info_template<T> volatile *sh,
13260b57cec5SDimitry Andric                                   kmp_int32 *p_last, T *p_lb, T *p_ub,
13270b57cec5SDimitry Andric                                   typename traits_t<T>::signed_t *p_st, T nproc,
13280b57cec5SDimitry Andric                                   T tid) {
13290b57cec5SDimitry Andric   typedef typename traits_t<T>::unsigned_t UT;
13300b57cec5SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
13310b57cec5SDimitry Andric   typedef typename traits_t<T>::floating_t DBL;
13320b57cec5SDimitry Andric   int status = 0;
1333e8d8bef9SDimitry Andric   bool last = false;
13340b57cec5SDimitry Andric   T start;
13350b57cec5SDimitry Andric   ST incr;
13360b57cec5SDimitry Andric   UT limit, trip, init;
13370b57cec5SDimitry Andric   kmp_info_t *th = __kmp_threads[gtid];
13380b57cec5SDimitry Andric   kmp_team_t *team = th->th.th_team;
13390b57cec5SDimitry Andric 
13400b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(th->th.th_dispatch ==
13410b57cec5SDimitry Andric                    &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
13420b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(pr);
13430b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(sh);
13440b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc);
13450b57cec5SDimitry Andric #ifdef KMP_DEBUG
13460b57cec5SDimitry Andric   {
13470b57cec5SDimitry Andric     char *buff;
13480b57cec5SDimitry Andric     // create format specifiers before the debug output
13490b57cec5SDimitry Andric     buff =
13500b57cec5SDimitry Andric         __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d called pr:%%p "
13510b57cec5SDimitry Andric                          "sh:%%p nproc:%%%s tid:%%%s\n",
13520b57cec5SDimitry Andric                          traits_t<T>::spec, traits_t<T>::spec);
13530b57cec5SDimitry Andric     KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid));
13540b57cec5SDimitry Andric     __kmp_str_free(&buff);
13550b57cec5SDimitry Andric   }
13560b57cec5SDimitry Andric #endif
13570b57cec5SDimitry Andric 
13580b57cec5SDimitry Andric   // zero trip count
13590b57cec5SDimitry Andric   if (pr->u.p.tc == 0) {
13600b57cec5SDimitry Andric     KD_TRACE(10,
13610b57cec5SDimitry Andric              ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
13620b57cec5SDimitry Andric               "zero status:%d\n",
13630b57cec5SDimitry Andric               gtid, status));
13640b57cec5SDimitry Andric     return 0;
13650b57cec5SDimitry Andric   }
13660b57cec5SDimitry Andric 
13670b57cec5SDimitry Andric   switch (pr->schedule) {
1368fe6060f1SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
13690b57cec5SDimitry Andric   case kmp_sch_static_steal: {
13700b57cec5SDimitry Andric     T chunk = pr->u.p.parm1;
1371fe6060f1SDimitry Andric     UT nchunks = pr->u.p.parm2;
13720b57cec5SDimitry Andric     KD_TRACE(100,
13730b57cec5SDimitry Andric              ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",
13740b57cec5SDimitry Andric               gtid));
13750b57cec5SDimitry Andric 
13760b57cec5SDimitry Andric     trip = pr->u.p.tc - 1;
13770b57cec5SDimitry Andric 
13780b57cec5SDimitry Andric     if (traits_t<T>::type_size > 4) {
1379fe6060f1SDimitry Andric       // use lock for 8-byte induction variable.
1380fe6060f1SDimitry Andric       // TODO (optional): check presence and use 16-byte CAS
1381fe6060f1SDimitry Andric       kmp_lock_t *lck = pr->u.p.steal_lock;
13820b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(lck != NULL);
13830b57cec5SDimitry Andric       if (pr->u.p.count < (UT)pr->u.p.ub) {
1384fe6060f1SDimitry Andric         KMP_DEBUG_ASSERT(pr->steal_flag == READY);
13850b57cec5SDimitry Andric         __kmp_acquire_lock(lck, gtid);
13860b57cec5SDimitry Andric         // try to get own chunk of iterations
13870b57cec5SDimitry Andric         init = (pr->u.p.count)++;
13880b57cec5SDimitry Andric         status = (init < (UT)pr->u.p.ub);
13890b57cec5SDimitry Andric         __kmp_release_lock(lck, gtid);
13900b57cec5SDimitry Andric       } else {
13910b57cec5SDimitry Andric         status = 0; // no own chunks
13920b57cec5SDimitry Andric       }
13930b57cec5SDimitry Andric       if (!status) { // try to steal
1394fe6060f1SDimitry Andric         kmp_lock_t *lckv; // victim buffer's lock
1395e8d8bef9SDimitry Andric         T while_limit = pr->u.p.parm3;
1396e8d8bef9SDimitry Andric         T while_index = 0;
13975ffd83dbSDimitry Andric         int idx = (th->th.th_dispatch->th_disp_index - 1) %
13985ffd83dbSDimitry Andric                   __kmp_dispatch_num_buffers; // current loop index
13995ffd83dbSDimitry Andric         // note: victim thread can potentially execute another loop
1400fe6060f1SDimitry Andric         KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF); // mark self buffer inactive
14010b57cec5SDimitry Andric         while ((!status) && (while_limit != ++while_index)) {
1402fe6060f1SDimitry Andric           dispatch_private_info_template<T> *v;
14030b57cec5SDimitry Andric           T remaining;
1404fe6060f1SDimitry Andric           T victimId = pr->u.p.parm4;
1405fe6060f1SDimitry Andric           T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1406fe6060f1SDimitry Andric           v = reinterpret_cast<dispatch_private_info_template<T> *>(
1407fe6060f1SDimitry Andric               &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1408fe6060f1SDimitry Andric           KMP_DEBUG_ASSERT(v);
1409fe6060f1SDimitry Andric           while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) &&
1410fe6060f1SDimitry Andric                  oldVictimId != victimId) {
1411fe6060f1SDimitry Andric             victimId = (victimId + 1) % nproc;
1412fe6060f1SDimitry Andric             v = reinterpret_cast<dispatch_private_info_template<T> *>(
1413fe6060f1SDimitry Andric                 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1414fe6060f1SDimitry Andric             KMP_DEBUG_ASSERT(v);
14150b57cec5SDimitry Andric           }
1416fe6060f1SDimitry Andric           if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) {
14170b57cec5SDimitry Andric             continue; // try once more (nproc attempts in total)
14180b57cec5SDimitry Andric           }
1419fe6060f1SDimitry Andric           if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) {
1420fe6060f1SDimitry Andric             kmp_uint32 old = UNUSED;
1421fe6060f1SDimitry Andric             // try to steal whole range from inactive victim
1422fe6060f1SDimitry Andric             status = v->steal_flag.compare_exchange_strong(old, THIEF);
1423fe6060f1SDimitry Andric             if (status) {
1424fe6060f1SDimitry Andric               // initialize self buffer with victim's whole range of chunks
1425fe6060f1SDimitry Andric               T id = victimId;
14265f757f3fSDimitry Andric               T small_chunk = 0, extras = 0, p_extra = 0;
14275f757f3fSDimitry Andric               __kmp_initialize_self_buffer<T>(team, id, pr, nchunks, nproc,
14285f757f3fSDimitry Andric                                               init, small_chunk, extras,
14295f757f3fSDimitry Andric                                               p_extra);
1430fe6060f1SDimitry Andric               __kmp_acquire_lock(lck, gtid);
1431fe6060f1SDimitry Andric               pr->u.p.count = init + 1; // exclude one we execute immediately
14325f757f3fSDimitry Andric               pr->u.p.ub = init + small_chunk + p_extra + (id < extras ? 1 : 0);
1433fe6060f1SDimitry Andric               __kmp_release_lock(lck, gtid);
1434fe6060f1SDimitry Andric               pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
1435fe6060f1SDimitry Andric               // no need to reinitialize other thread invariants: lb, st, etc.
1436fe6060f1SDimitry Andric #ifdef KMP_DEBUG
1437fe6060f1SDimitry Andric               {
1438fe6060f1SDimitry Andric                 char *buff;
1439fe6060f1SDimitry Andric                 // create format specifiers before the debug output
14405f757f3fSDimitry Andric                 buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d "
14415f757f3fSDimitry Andric                                         "stolen chunks from T#%%d, "
1442fe6060f1SDimitry Andric                                         "count:%%%s ub:%%%s\n",
1443fe6060f1SDimitry Andric                                         traits_t<UT>::spec, traits_t<T>::spec);
1444fe6060f1SDimitry Andric                 KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub));
1445fe6060f1SDimitry Andric                 __kmp_str_free(&buff);
1446fe6060f1SDimitry Andric               }
1447fe6060f1SDimitry Andric #endif
1448fe6060f1SDimitry Andric               // activate non-empty buffer and let others steal from us
1449fe6060f1SDimitry Andric               if (pr->u.p.count < (UT)pr->u.p.ub)
1450fe6060f1SDimitry Andric                 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1451fe6060f1SDimitry Andric               break;
1452fe6060f1SDimitry Andric             }
1453fe6060f1SDimitry Andric           }
1454bdd1243dSDimitry Andric           if (KMP_ATOMIC_LD_ACQ(&v->steal_flag) != READY ||
1455fe6060f1SDimitry Andric               v->u.p.count >= (UT)v->u.p.ub) {
1456fe6060f1SDimitry Andric             pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim tid
1457fe6060f1SDimitry Andric             continue; // no chunks to steal, try next victim
1458fe6060f1SDimitry Andric           }
1459fe6060f1SDimitry Andric           lckv = v->u.p.steal_lock;
1460fe6060f1SDimitry Andric           KMP_ASSERT(lckv != NULL);
1461fe6060f1SDimitry Andric           __kmp_acquire_lock(lckv, gtid);
1462fe6060f1SDimitry Andric           limit = v->u.p.ub; // keep initial ub
1463fe6060f1SDimitry Andric           if (v->u.p.count >= limit) {
1464fe6060f1SDimitry Andric             __kmp_release_lock(lckv, gtid);
1465fe6060f1SDimitry Andric             pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim tid
1466fe6060f1SDimitry Andric             continue; // no chunks to steal, try next victim
14670b57cec5SDimitry Andric           }
14680b57cec5SDimitry Andric 
1469fe6060f1SDimitry Andric           // stealing succeded, reduce victim's ub by 1/4 of undone chunks
1470fe6060f1SDimitry Andric           // TODO: is this heuristics good enough??
1471fe6060f1SDimitry Andric           remaining = limit - v->u.p.count;
1472fe6060f1SDimitry Andric           if (remaining > 7) {
14730b57cec5SDimitry Andric             // steal 1/4 of remaining
14740b57cec5SDimitry Andric             KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2);
1475fe6060f1SDimitry Andric             init = (v->u.p.ub -= (remaining >> 2));
14760b57cec5SDimitry Andric           } else {
1477fe6060f1SDimitry Andric             // steal 1 chunk of 1..7 remaining
14780b57cec5SDimitry Andric             KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1);
1479fe6060f1SDimitry Andric             init = (v->u.p.ub -= 1);
14800b57cec5SDimitry Andric           }
1481fe6060f1SDimitry Andric           __kmp_release_lock(lckv, gtid);
1482fe6060f1SDimitry Andric #ifdef KMP_DEBUG
1483fe6060f1SDimitry Andric           {
1484fe6060f1SDimitry Andric             char *buff;
1485fe6060f1SDimitry Andric             // create format specifiers before the debug output
1486fe6060f1SDimitry Andric             buff = __kmp_str_format(
1487fe6060f1SDimitry Andric                 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1488fe6060f1SDimitry Andric                 "count:%%%s ub:%%%s\n",
1489fe6060f1SDimitry Andric                 traits_t<UT>::spec, traits_t<UT>::spec);
1490fe6060f1SDimitry Andric             KD_TRACE(10, (buff, gtid, victimId, init, limit));
1491fe6060f1SDimitry Andric             __kmp_str_free(&buff);
1492fe6060f1SDimitry Andric           }
1493fe6060f1SDimitry Andric #endif
14940b57cec5SDimitry Andric           KMP_DEBUG_ASSERT(init + 1 <= limit);
1495fe6060f1SDimitry Andric           pr->u.p.parm4 = victimId; // remember victim to steal from
14960b57cec5SDimitry Andric           status = 1;
1497fe6060f1SDimitry Andric           // now update own count and ub with stolen range excluding init chunk
1498fe6060f1SDimitry Andric           __kmp_acquire_lock(lck, gtid);
14990b57cec5SDimitry Andric           pr->u.p.count = init + 1;
15000b57cec5SDimitry Andric           pr->u.p.ub = limit;
1501fe6060f1SDimitry Andric           __kmp_release_lock(lck, gtid);
1502fe6060f1SDimitry Andric           // activate non-empty buffer and let others steal from us
1503fe6060f1SDimitry Andric           if (init + 1 < limit)
1504fe6060f1SDimitry Andric             KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
15050b57cec5SDimitry Andric         } // while (search for victim)
15060b57cec5SDimitry Andric       } // if (try to find victim and steal)
15070b57cec5SDimitry Andric     } else {
15080b57cec5SDimitry Andric       // 4-byte induction variable, use 8-byte CAS for pair (count, ub)
1509fe6060f1SDimitry Andric       // as all operations on pair (count, ub) must be done atomically
15100b57cec5SDimitry Andric       typedef union {
15110b57cec5SDimitry Andric         struct {
15120b57cec5SDimitry Andric           UT count;
15130b57cec5SDimitry Andric           T ub;
15140b57cec5SDimitry Andric         } p;
15150b57cec5SDimitry Andric         kmp_int64 b;
15160b57cec5SDimitry Andric       } union_i4;
15170b57cec5SDimitry Andric       union_i4 vold, vnew;
1518fe6060f1SDimitry Andric       if (pr->u.p.count < (UT)pr->u.p.ub) {
1519fe6060f1SDimitry Andric         KMP_DEBUG_ASSERT(pr->steal_flag == READY);
15200b57cec5SDimitry Andric         vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
1521fe6060f1SDimitry Andric         vnew.b = vold.b;
1522fe6060f1SDimitry Andric         vnew.p.count++; // get chunk from head of self range
1523fe6060f1SDimitry Andric         while (!KMP_COMPARE_AND_STORE_REL64(
15240b57cec5SDimitry Andric             (volatile kmp_int64 *)&pr->u.p.count,
15250b57cec5SDimitry Andric             *VOLATILE_CAST(kmp_int64 *) & vold.b,
15260b57cec5SDimitry Andric             *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
15270b57cec5SDimitry Andric           KMP_CPU_PAUSE();
15280b57cec5SDimitry Andric           vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
1529fe6060f1SDimitry Andric           vnew.b = vold.b;
15300b57cec5SDimitry Andric           vnew.p.count++;
15310b57cec5SDimitry Andric         }
1532fe6060f1SDimitry Andric         init = vold.p.count;
1533fe6060f1SDimitry Andric         status = (init < (UT)vold.p.ub);
1534fe6060f1SDimitry Andric       } else {
1535fe6060f1SDimitry Andric         status = 0; // no own chunks
15360b57cec5SDimitry Andric       }
1537fe6060f1SDimitry Andric       if (!status) { // try to steal
1538e8d8bef9SDimitry Andric         T while_limit = pr->u.p.parm3;
1539e8d8bef9SDimitry Andric         T while_index = 0;
15405ffd83dbSDimitry Andric         int idx = (th->th.th_dispatch->th_disp_index - 1) %
15415ffd83dbSDimitry Andric                   __kmp_dispatch_num_buffers; // current loop index
15425ffd83dbSDimitry Andric         // note: victim thread can potentially execute another loop
1543fe6060f1SDimitry Andric         KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF); // mark self buffer inactive
15440b57cec5SDimitry Andric         while ((!status) && (while_limit != ++while_index)) {
1545fe6060f1SDimitry Andric           dispatch_private_info_template<T> *v;
1546e8d8bef9SDimitry Andric           T remaining;
1547fe6060f1SDimitry Andric           T victimId = pr->u.p.parm4;
1548fe6060f1SDimitry Andric           T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1549fe6060f1SDimitry Andric           v = reinterpret_cast<dispatch_private_info_template<T> *>(
1550fe6060f1SDimitry Andric               &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1551fe6060f1SDimitry Andric           KMP_DEBUG_ASSERT(v);
1552fe6060f1SDimitry Andric           while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) &&
1553fe6060f1SDimitry Andric                  oldVictimId != victimId) {
1554fe6060f1SDimitry Andric             victimId = (victimId + 1) % nproc;
1555fe6060f1SDimitry Andric             v = reinterpret_cast<dispatch_private_info_template<T> *>(
1556fe6060f1SDimitry Andric                 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1557fe6060f1SDimitry Andric             KMP_DEBUG_ASSERT(v);
15580b57cec5SDimitry Andric           }
1559fe6060f1SDimitry Andric           if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) {
15600b57cec5SDimitry Andric             continue; // try once more (nproc attempts in total)
15610b57cec5SDimitry Andric           }
1562fe6060f1SDimitry Andric           if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) {
1563fe6060f1SDimitry Andric             kmp_uint32 old = UNUSED;
1564fe6060f1SDimitry Andric             // try to steal whole range from inactive victim
1565fe6060f1SDimitry Andric             status = v->steal_flag.compare_exchange_strong(old, THIEF);
1566fe6060f1SDimitry Andric             if (status) {
1567fe6060f1SDimitry Andric               // initialize self buffer with victim's whole range of chunks
1568fe6060f1SDimitry Andric               T id = victimId;
15695f757f3fSDimitry Andric               T small_chunk = 0, extras = 0, p_extra = 0;
15705f757f3fSDimitry Andric               __kmp_initialize_self_buffer<T>(team, id, pr, nchunks, nproc,
15715f757f3fSDimitry Andric                                               init, small_chunk, extras,
15725f757f3fSDimitry Andric                                               p_extra);
1573fe6060f1SDimitry Andric               vnew.p.count = init + 1;
15745f757f3fSDimitry Andric               vnew.p.ub = init + small_chunk + p_extra + (id < extras ? 1 : 0);
1575fe6060f1SDimitry Andric               // write pair (count, ub) at once atomically
1576fe6060f1SDimitry Andric #if KMP_ARCH_X86
1577fe6060f1SDimitry Andric               KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vnew.b);
1578fe6060f1SDimitry Andric #else
1579fe6060f1SDimitry Andric               *(volatile kmp_int64 *)(&pr->u.p.count) = vnew.b;
1580fe6060f1SDimitry Andric #endif
1581fe6060f1SDimitry Andric               pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
1582fe6060f1SDimitry Andric               // no need to initialize other thread invariants: lb, st, etc.
1583fe6060f1SDimitry Andric #ifdef KMP_DEBUG
1584fe6060f1SDimitry Andric               {
1585fe6060f1SDimitry Andric                 char *buff;
1586fe6060f1SDimitry Andric                 // create format specifiers before the debug output
15875f757f3fSDimitry Andric                 buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d "
15885f757f3fSDimitry Andric                                         "stolen chunks from T#%%d, "
1589fe6060f1SDimitry Andric                                         "count:%%%s ub:%%%s\n",
1590fe6060f1SDimitry Andric                                         traits_t<UT>::spec, traits_t<T>::spec);
1591fe6060f1SDimitry Andric                 KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub));
1592fe6060f1SDimitry Andric                 __kmp_str_free(&buff);
15930b57cec5SDimitry Andric               }
1594fe6060f1SDimitry Andric #endif
1595fe6060f1SDimitry Andric               // activate non-empty buffer and let others steal from us
1596fe6060f1SDimitry Andric               if (pr->u.p.count < (UT)pr->u.p.ub)
1597fe6060f1SDimitry Andric                 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1598fe6060f1SDimitry Andric               break;
1599fe6060f1SDimitry Andric             }
1600fe6060f1SDimitry Andric           }
1601fe6060f1SDimitry Andric           while (1) { // CAS loop with check if victim still has enough chunks
1602fe6060f1SDimitry Andric             // many threads may be stealing concurrently from same victim
1603fe6060f1SDimitry Andric             vold.b = *(volatile kmp_int64 *)(&v->u.p.count);
1604fe6060f1SDimitry Andric             if (KMP_ATOMIC_LD_ACQ(&v->steal_flag) != READY ||
1605fe6060f1SDimitry Andric                 vold.p.count >= (UT)vold.p.ub) {
1606fe6060f1SDimitry Andric               pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim id
1607fe6060f1SDimitry Andric               break; // no chunks to steal, try next victim
1608fe6060f1SDimitry Andric             }
1609fe6060f1SDimitry Andric             vnew.b = vold.b;
1610fe6060f1SDimitry Andric             remaining = vold.p.ub - vold.p.count;
1611e8d8bef9SDimitry Andric             // try to steal 1/4 of remaining
1612fe6060f1SDimitry Andric             // TODO: is this heuristics good enough??
1613fe6060f1SDimitry Andric             if (remaining > 7) {
1614fe6060f1SDimitry Andric               vnew.p.ub -= remaining >> 2; // steal from tail of victim's range
16150b57cec5SDimitry Andric             } else {
1616fe6060f1SDimitry Andric               vnew.p.ub -= 1; // steal 1 chunk of 1..7 remaining
16170b57cec5SDimitry Andric             }
1618fe6060f1SDimitry Andric             KMP_DEBUG_ASSERT(vnew.p.ub * (UT)chunk <= trip);
1619fe6060f1SDimitry Andric             if (KMP_COMPARE_AND_STORE_REL64(
1620fe6060f1SDimitry Andric                     (volatile kmp_int64 *)&v->u.p.count,
16210b57cec5SDimitry Andric                     *VOLATILE_CAST(kmp_int64 *) & vold.b,
16220b57cec5SDimitry Andric                     *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1623fe6060f1SDimitry Andric               // stealing succedded
1624fe6060f1SDimitry Andric #ifdef KMP_DEBUG
1625fe6060f1SDimitry Andric               {
1626fe6060f1SDimitry Andric                 char *buff;
1627fe6060f1SDimitry Andric                 // create format specifiers before the debug output
1628fe6060f1SDimitry Andric                 buff = __kmp_str_format(
1629fe6060f1SDimitry Andric                     "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1630fe6060f1SDimitry Andric                     "count:%%%s ub:%%%s\n",
1631fe6060f1SDimitry Andric                     traits_t<T>::spec, traits_t<T>::spec);
1632fe6060f1SDimitry Andric                 KD_TRACE(10, (buff, gtid, victimId, vnew.p.ub, vold.p.ub));
1633fe6060f1SDimitry Andric                 __kmp_str_free(&buff);
1634fe6060f1SDimitry Andric               }
1635fe6060f1SDimitry Andric #endif
16360b57cec5SDimitry Andric               KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,
16370b57cec5SDimitry Andric                                         vold.p.ub - vnew.p.ub);
16380b57cec5SDimitry Andric               status = 1;
1639fe6060f1SDimitry Andric               pr->u.p.parm4 = victimId; // keep victim id
16400b57cec5SDimitry Andric               // now update own count and ub
16410b57cec5SDimitry Andric               init = vnew.p.ub;
16420b57cec5SDimitry Andric               vold.p.count = init + 1;
16430b57cec5SDimitry Andric #if KMP_ARCH_X86
16440b57cec5SDimitry Andric               KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vold.b);
16450b57cec5SDimitry Andric #else
16460b57cec5SDimitry Andric               *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
16470b57cec5SDimitry Andric #endif
1648fe6060f1SDimitry Andric               // activate non-empty buffer and let others steal from us
1649fe6060f1SDimitry Andric               if (vold.p.count < (UT)vold.p.ub)
1650fe6060f1SDimitry Andric                 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
16510b57cec5SDimitry Andric               break;
16520b57cec5SDimitry Andric             } // if (check CAS result)
16535ffd83dbSDimitry Andric             KMP_CPU_PAUSE(); // CAS failed, repeatedly attempt
16540b57cec5SDimitry Andric           } // while (try to steal from particular victim)
16550b57cec5SDimitry Andric         } // while (search for victim)
16560b57cec5SDimitry Andric       } // if (try to find victim and steal)
16570b57cec5SDimitry Andric     } // if (4-byte induction variable)
16580b57cec5SDimitry Andric     if (!status) {
16590b57cec5SDimitry Andric       *p_lb = 0;
16600b57cec5SDimitry Andric       *p_ub = 0;
16610b57cec5SDimitry Andric       if (p_st != NULL)
16620b57cec5SDimitry Andric         *p_st = 0;
16630b57cec5SDimitry Andric     } else {
1664fe6060f1SDimitry Andric       start = pr->u.p.lb;
16650b57cec5SDimitry Andric       init *= chunk;
16660b57cec5SDimitry Andric       limit = chunk + init - 1;
16670b57cec5SDimitry Andric       incr = pr->u.p.st;
16680b57cec5SDimitry Andric       KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1);
16690b57cec5SDimitry Andric 
16700b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(init <= trip);
1671fe6060f1SDimitry Andric       // keep track of done chunks for possible early exit from stealing
1672fe6060f1SDimitry Andric       // TODO: count executed chunks locally with rare update of shared location
1673fe6060f1SDimitry Andric       // test_then_inc<ST>((volatile ST *)&sh->u.s.iteration);
16740b57cec5SDimitry Andric       if ((last = (limit >= trip)) != 0)
16750b57cec5SDimitry Andric         limit = trip;
16760b57cec5SDimitry Andric       if (p_st != NULL)
16770b57cec5SDimitry Andric         *p_st = incr;
16780b57cec5SDimitry Andric 
16790b57cec5SDimitry Andric       if (incr == 1) {
16800b57cec5SDimitry Andric         *p_lb = start + init;
16810b57cec5SDimitry Andric         *p_ub = start + limit;
16820b57cec5SDimitry Andric       } else {
16830b57cec5SDimitry Andric         *p_lb = start + init * incr;
16840b57cec5SDimitry Andric         *p_ub = start + limit * incr;
16850b57cec5SDimitry Andric       }
16860b57cec5SDimitry Andric     } // if
16870b57cec5SDimitry Andric     break;
16880b57cec5SDimitry Andric   } // case
1689fe6060f1SDimitry Andric #endif // KMP_STATIC_STEAL_ENABLED
16900b57cec5SDimitry Andric   case kmp_sch_static_balanced: {
16910b57cec5SDimitry Andric     KD_TRACE(
16920b57cec5SDimitry Andric         10,
16930b57cec5SDimitry Andric         ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",
16940b57cec5SDimitry Andric          gtid));
16950b57cec5SDimitry Andric     /* check if thread has any iteration to do */
16960b57cec5SDimitry Andric     if ((status = !pr->u.p.count) != 0) {
16970b57cec5SDimitry Andric       pr->u.p.count = 1;
16980b57cec5SDimitry Andric       *p_lb = pr->u.p.lb;
16990b57cec5SDimitry Andric       *p_ub = pr->u.p.ub;
1700e8d8bef9SDimitry Andric       last = (pr->u.p.parm1 != 0);
17010b57cec5SDimitry Andric       if (p_st != NULL)
17020b57cec5SDimitry Andric         *p_st = pr->u.p.st;
17030b57cec5SDimitry Andric     } else { /* no iterations to do */
17040b57cec5SDimitry Andric       pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
17050b57cec5SDimitry Andric     }
17060b57cec5SDimitry Andric   } // case
17070b57cec5SDimitry Andric   break;
17080b57cec5SDimitry Andric   case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was
17090b57cec5SDimitry Andric                                  merged here */
17100b57cec5SDimitry Andric   case kmp_sch_static_chunked: {
17110b57cec5SDimitry Andric     T parm1;
17120b57cec5SDimitry Andric 
17130b57cec5SDimitry Andric     KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "
17140b57cec5SDimitry Andric                    "kmp_sch_static_[affinity|chunked] case\n",
17150b57cec5SDimitry Andric                    gtid));
17160b57cec5SDimitry Andric     parm1 = pr->u.p.parm1;
17170b57cec5SDimitry Andric 
17180b57cec5SDimitry Andric     trip = pr->u.p.tc - 1;
17190b57cec5SDimitry Andric     init = parm1 * (pr->u.p.count + tid);
17200b57cec5SDimitry Andric 
17210b57cec5SDimitry Andric     if ((status = (init <= trip)) != 0) {
17220b57cec5SDimitry Andric       start = pr->u.p.lb;
17230b57cec5SDimitry Andric       incr = pr->u.p.st;
17240b57cec5SDimitry Andric       limit = parm1 + init - 1;
17250b57cec5SDimitry Andric 
17260b57cec5SDimitry Andric       if ((last = (limit >= trip)) != 0)
17270b57cec5SDimitry Andric         limit = trip;
17280b57cec5SDimitry Andric 
17290b57cec5SDimitry Andric       if (p_st != NULL)
17300b57cec5SDimitry Andric         *p_st = incr;
17310b57cec5SDimitry Andric 
17320b57cec5SDimitry Andric       pr->u.p.count += nproc;
17330b57cec5SDimitry Andric 
17340b57cec5SDimitry Andric       if (incr == 1) {
17350b57cec5SDimitry Andric         *p_lb = start + init;
17360b57cec5SDimitry Andric         *p_ub = start + limit;
17370b57cec5SDimitry Andric       } else {
17380b57cec5SDimitry Andric         *p_lb = start + init * incr;
17390b57cec5SDimitry Andric         *p_ub = start + limit * incr;
17400b57cec5SDimitry Andric       }
17410b57cec5SDimitry Andric 
17420b57cec5SDimitry Andric       if (pr->flags.ordered) {
17430b57cec5SDimitry Andric         pr->u.p.ordered_lower = init;
17440b57cec5SDimitry Andric         pr->u.p.ordered_upper = limit;
17450b57cec5SDimitry Andric       } // if
17460b57cec5SDimitry Andric     } // if
17470b57cec5SDimitry Andric   } // case
17480b57cec5SDimitry Andric   break;
17490b57cec5SDimitry Andric 
17500b57cec5SDimitry Andric   case kmp_sch_dynamic_chunked: {
1751fe6060f1SDimitry Andric     UT chunk_number;
1752fe6060f1SDimitry Andric     UT chunk_size = pr->u.p.parm1;
1753fe6060f1SDimitry Andric     UT nchunks = pr->u.p.parm2;
17540b57cec5SDimitry Andric 
17550b57cec5SDimitry Andric     KD_TRACE(
17560b57cec5SDimitry Andric         100,
17570b57cec5SDimitry Andric         ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",
17580b57cec5SDimitry Andric          gtid));
17590b57cec5SDimitry Andric 
1760fe6060f1SDimitry Andric     chunk_number = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
1761fe6060f1SDimitry Andric     status = (chunk_number < nchunks);
1762fe6060f1SDimitry Andric     if (!status) {
17630b57cec5SDimitry Andric       *p_lb = 0;
17640b57cec5SDimitry Andric       *p_ub = 0;
17650b57cec5SDimitry Andric       if (p_st != NULL)
17660b57cec5SDimitry Andric         *p_st = 0;
17670b57cec5SDimitry Andric     } else {
1768fe6060f1SDimitry Andric       init = chunk_size * chunk_number;
1769fe6060f1SDimitry Andric       trip = pr->u.p.tc - 1;
17700b57cec5SDimitry Andric       start = pr->u.p.lb;
17710b57cec5SDimitry Andric       incr = pr->u.p.st;
17720b57cec5SDimitry Andric 
1773fe6060f1SDimitry Andric       if ((last = (trip - init < (UT)chunk_size)))
17740b57cec5SDimitry Andric         limit = trip;
1775fe6060f1SDimitry Andric       else
1776fe6060f1SDimitry Andric         limit = chunk_size + init - 1;
17770b57cec5SDimitry Andric 
17780b57cec5SDimitry Andric       if (p_st != NULL)
17790b57cec5SDimitry Andric         *p_st = incr;
17800b57cec5SDimitry Andric 
17810b57cec5SDimitry Andric       if (incr == 1) {
17820b57cec5SDimitry Andric         *p_lb = start + init;
17830b57cec5SDimitry Andric         *p_ub = start + limit;
17840b57cec5SDimitry Andric       } else {
17850b57cec5SDimitry Andric         *p_lb = start + init * incr;
17860b57cec5SDimitry Andric         *p_ub = start + limit * incr;
17870b57cec5SDimitry Andric       }
17880b57cec5SDimitry Andric 
17890b57cec5SDimitry Andric       if (pr->flags.ordered) {
17900b57cec5SDimitry Andric         pr->u.p.ordered_lower = init;
17910b57cec5SDimitry Andric         pr->u.p.ordered_upper = limit;
17920b57cec5SDimitry Andric       } // if
17930b57cec5SDimitry Andric     } // if
17940b57cec5SDimitry Andric   } // case
17950b57cec5SDimitry Andric   break;
17960b57cec5SDimitry Andric 
17970b57cec5SDimitry Andric   case kmp_sch_guided_iterative_chunked: {
17980b57cec5SDimitry Andric     T chunkspec = pr->u.p.parm1;
17990b57cec5SDimitry Andric     KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
18000b57cec5SDimitry Andric                    "iterative case\n",
18010b57cec5SDimitry Andric                    gtid));
18020b57cec5SDimitry Andric     trip = pr->u.p.tc;
18030b57cec5SDimitry Andric     // Start atomic part of calculations
18040b57cec5SDimitry Andric     while (1) {
18050b57cec5SDimitry Andric       ST remaining; // signed, because can be < 0
18060b57cec5SDimitry Andric       init = sh->u.s.iteration; // shared value
18070b57cec5SDimitry Andric       remaining = trip - init;
18080b57cec5SDimitry Andric       if (remaining <= 0) { // AC: need to compare with 0 first
18090b57cec5SDimitry Andric         // nothing to do, don't try atomic op
18100b57cec5SDimitry Andric         status = 0;
18110b57cec5SDimitry Andric         break;
18120b57cec5SDimitry Andric       }
18130b57cec5SDimitry Andric       if ((T)remaining <
18140b57cec5SDimitry Andric           pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default
18155ffd83dbSDimitry Andric         // use dynamic-style schedule
1816480093f4SDimitry Andric         // atomically increment iterations, get old value
18170b57cec5SDimitry Andric         init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
18180b57cec5SDimitry Andric                                  (ST)chunkspec);
18190b57cec5SDimitry Andric         remaining = trip - init;
18200b57cec5SDimitry Andric         if (remaining <= 0) {
18210b57cec5SDimitry Andric           status = 0; // all iterations got by other threads
18220b57cec5SDimitry Andric         } else {
18230b57cec5SDimitry Andric           // got some iterations to work on
18240b57cec5SDimitry Andric           status = 1;
18250b57cec5SDimitry Andric           if ((T)remaining > chunkspec) {
18260b57cec5SDimitry Andric             limit = init + chunkspec - 1;
18270b57cec5SDimitry Andric           } else {
1828e8d8bef9SDimitry Andric             last = true; // the last chunk
18290b57cec5SDimitry Andric             limit = init + remaining - 1;
18300b57cec5SDimitry Andric           } // if
18310b57cec5SDimitry Andric         } // if
18320b57cec5SDimitry Andric         break;
18330b57cec5SDimitry Andric       } // if
1834e8d8bef9SDimitry Andric       limit = init + (UT)((double)remaining *
1835e8d8bef9SDimitry Andric                           *(double *)&pr->u.p.parm3); // divide by K*nproc
18360b57cec5SDimitry Andric       if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
18370b57cec5SDimitry Andric                                (ST)init, (ST)limit)) {
18380b57cec5SDimitry Andric         // CAS was successful, chunk obtained
18390b57cec5SDimitry Andric         status = 1;
18400b57cec5SDimitry Andric         --limit;
18410b57cec5SDimitry Andric         break;
18420b57cec5SDimitry Andric       } // if
18430b57cec5SDimitry Andric     } // while
18440b57cec5SDimitry Andric     if (status != 0) {
18450b57cec5SDimitry Andric       start = pr->u.p.lb;
18460b57cec5SDimitry Andric       incr = pr->u.p.st;
18470b57cec5SDimitry Andric       if (p_st != NULL)
18480b57cec5SDimitry Andric         *p_st = incr;
18490b57cec5SDimitry Andric       *p_lb = start + init * incr;
18500b57cec5SDimitry Andric       *p_ub = start + limit * incr;
18510b57cec5SDimitry Andric       if (pr->flags.ordered) {
18520b57cec5SDimitry Andric         pr->u.p.ordered_lower = init;
18530b57cec5SDimitry Andric         pr->u.p.ordered_upper = limit;
18540b57cec5SDimitry Andric       } // if
18550b57cec5SDimitry Andric     } else {
18560b57cec5SDimitry Andric       *p_lb = 0;
18570b57cec5SDimitry Andric       *p_ub = 0;
18580b57cec5SDimitry Andric       if (p_st != NULL)
18590b57cec5SDimitry Andric         *p_st = 0;
18600b57cec5SDimitry Andric     } // if
18610b57cec5SDimitry Andric   } // case
18620b57cec5SDimitry Andric   break;
18630b57cec5SDimitry Andric 
18640b57cec5SDimitry Andric   case kmp_sch_guided_simd: {
18650b57cec5SDimitry Andric     // same as iterative but curr-chunk adjusted to be multiple of given
18660b57cec5SDimitry Andric     // chunk
18670b57cec5SDimitry Andric     T chunk = pr->u.p.parm1;
18680b57cec5SDimitry Andric     KD_TRACE(100,
18690b57cec5SDimitry Andric              ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",
18700b57cec5SDimitry Andric               gtid));
18710b57cec5SDimitry Andric     trip = pr->u.p.tc;
18720b57cec5SDimitry Andric     // Start atomic part of calculations
18730b57cec5SDimitry Andric     while (1) {
18740b57cec5SDimitry Andric       ST remaining; // signed, because can be < 0
18750b57cec5SDimitry Andric       init = sh->u.s.iteration; // shared value
18760b57cec5SDimitry Andric       remaining = trip - init;
18770b57cec5SDimitry Andric       if (remaining <= 0) { // AC: need to compare with 0 first
18780b57cec5SDimitry Andric         status = 0; // nothing to do, don't try atomic op
18790b57cec5SDimitry Andric         break;
18800b57cec5SDimitry Andric       }
1881349cc55cSDimitry Andric       KMP_DEBUG_ASSERT(chunk && init % chunk == 0);
18820b57cec5SDimitry Andric       // compare with K*nproc*(chunk+1), K=2 by default
18830b57cec5SDimitry Andric       if ((T)remaining < pr->u.p.parm2) {
18845ffd83dbSDimitry Andric         // use dynamic-style schedule
1885480093f4SDimitry Andric         // atomically increment iterations, get old value
18860b57cec5SDimitry Andric         init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
18870b57cec5SDimitry Andric                                  (ST)chunk);
18880b57cec5SDimitry Andric         remaining = trip - init;
18890b57cec5SDimitry Andric         if (remaining <= 0) {
18900b57cec5SDimitry Andric           status = 0; // all iterations got by other threads
18910b57cec5SDimitry Andric         } else {
18920b57cec5SDimitry Andric           // got some iterations to work on
18930b57cec5SDimitry Andric           status = 1;
18940b57cec5SDimitry Andric           if ((T)remaining > chunk) {
18950b57cec5SDimitry Andric             limit = init + chunk - 1;
18960b57cec5SDimitry Andric           } else {
1897e8d8bef9SDimitry Andric             last = true; // the last chunk
18980b57cec5SDimitry Andric             limit = init + remaining - 1;
18990b57cec5SDimitry Andric           } // if
19000b57cec5SDimitry Andric         } // if
19010b57cec5SDimitry Andric         break;
19020b57cec5SDimitry Andric       } // if
19030b57cec5SDimitry Andric       // divide by K*nproc
1904e8d8bef9SDimitry Andric       UT span;
1905e8d8bef9SDimitry Andric       __kmp_type_convert((double)remaining * (*(double *)&pr->u.p.parm3),
1906e8d8bef9SDimitry Andric                          &span);
19070b57cec5SDimitry Andric       UT rem = span % chunk;
19080b57cec5SDimitry Andric       if (rem) // adjust so that span%chunk == 0
19090b57cec5SDimitry Andric         span += chunk - rem;
19100b57cec5SDimitry Andric       limit = init + span;
19110b57cec5SDimitry Andric       if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
19120b57cec5SDimitry Andric                                (ST)init, (ST)limit)) {
19130b57cec5SDimitry Andric         // CAS was successful, chunk obtained
19140b57cec5SDimitry Andric         status = 1;
19150b57cec5SDimitry Andric         --limit;
19160b57cec5SDimitry Andric         break;
19170b57cec5SDimitry Andric       } // if
19180b57cec5SDimitry Andric     } // while
19190b57cec5SDimitry Andric     if (status != 0) {
19200b57cec5SDimitry Andric       start = pr->u.p.lb;
19210b57cec5SDimitry Andric       incr = pr->u.p.st;
19220b57cec5SDimitry Andric       if (p_st != NULL)
19230b57cec5SDimitry Andric         *p_st = incr;
19240b57cec5SDimitry Andric       *p_lb = start + init * incr;
19250b57cec5SDimitry Andric       *p_ub = start + limit * incr;
19260b57cec5SDimitry Andric       if (pr->flags.ordered) {
19270b57cec5SDimitry Andric         pr->u.p.ordered_lower = init;
19280b57cec5SDimitry Andric         pr->u.p.ordered_upper = limit;
19290b57cec5SDimitry Andric       } // if
19300b57cec5SDimitry Andric     } else {
19310b57cec5SDimitry Andric       *p_lb = 0;
19320b57cec5SDimitry Andric       *p_ub = 0;
19330b57cec5SDimitry Andric       if (p_st != NULL)
19340b57cec5SDimitry Andric         *p_st = 0;
19350b57cec5SDimitry Andric     } // if
19360b57cec5SDimitry Andric   } // case
19370b57cec5SDimitry Andric   break;
19380b57cec5SDimitry Andric 
19390b57cec5SDimitry Andric   case kmp_sch_guided_analytical_chunked: {
19400b57cec5SDimitry Andric     T chunkspec = pr->u.p.parm1;
19410b57cec5SDimitry Andric     UT chunkIdx;
19420b57cec5SDimitry Andric #if KMP_USE_X87CONTROL
19430b57cec5SDimitry Andric     /* for storing original FPCW value for Windows* OS on
19440b57cec5SDimitry Andric        IA-32 architecture 8-byte version */
19450b57cec5SDimitry Andric     unsigned int oldFpcw;
19460b57cec5SDimitry Andric     unsigned int fpcwSet = 0;
19470b57cec5SDimitry Andric #endif
19480b57cec5SDimitry Andric     KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "
19490b57cec5SDimitry Andric                    "kmp_sch_guided_analytical_chunked case\n",
19500b57cec5SDimitry Andric                    gtid));
19510b57cec5SDimitry Andric 
19520b57cec5SDimitry Andric     trip = pr->u.p.tc;
19530b57cec5SDimitry Andric 
19540b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(nproc > 1);
19550b57cec5SDimitry Andric     KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip);
19560b57cec5SDimitry Andric 
19570b57cec5SDimitry Andric     while (1) { /* this while loop is a safeguard against unexpected zero
19580b57cec5SDimitry Andric                    chunk sizes */
19590b57cec5SDimitry Andric       chunkIdx = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
19600b57cec5SDimitry Andric       if (chunkIdx >= (UT)pr->u.p.parm2) {
19610b57cec5SDimitry Andric         --trip;
19620b57cec5SDimitry Andric         /* use dynamic-style scheduling */
19630b57cec5SDimitry Andric         init = chunkIdx * chunkspec + pr->u.p.count;
19640b57cec5SDimitry Andric         /* need to verify init > 0 in case of overflow in the above
19650b57cec5SDimitry Andric          * calculation */
19660b57cec5SDimitry Andric         if ((status = (init > 0 && init <= trip)) != 0) {
19670b57cec5SDimitry Andric           limit = init + chunkspec - 1;
19680b57cec5SDimitry Andric 
19690b57cec5SDimitry Andric           if ((last = (limit >= trip)) != 0)
19700b57cec5SDimitry Andric             limit = trip;
19710b57cec5SDimitry Andric         }
19720b57cec5SDimitry Andric         break;
19730b57cec5SDimitry Andric       } else {
19740b57cec5SDimitry Andric /* use exponential-style scheduling */
19750b57cec5SDimitry Andric /* The following check is to workaround the lack of long double precision on
19760b57cec5SDimitry Andric    Windows* OS.
19770b57cec5SDimitry Andric    This check works around the possible effect that init != 0 for chunkIdx == 0.
19780b57cec5SDimitry Andric  */
19790b57cec5SDimitry Andric #if KMP_USE_X87CONTROL
19800b57cec5SDimitry Andric         /* If we haven't already done so, save original
19810b57cec5SDimitry Andric            FPCW and set precision to 64-bit, as Windows* OS
19820b57cec5SDimitry Andric            on IA-32 architecture defaults to 53-bit */
19830b57cec5SDimitry Andric         if (!fpcwSet) {
19840b57cec5SDimitry Andric           oldFpcw = _control87(0, 0);
19850b57cec5SDimitry Andric           _control87(_PC_64, _MCW_PC);
19860b57cec5SDimitry Andric           fpcwSet = 0x30000;
19870b57cec5SDimitry Andric         }
19880b57cec5SDimitry Andric #endif
19890b57cec5SDimitry Andric         if (chunkIdx) {
19900b57cec5SDimitry Andric           init = __kmp_dispatch_guided_remaining<T>(
19910b57cec5SDimitry Andric               trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
19920b57cec5SDimitry Andric           KMP_DEBUG_ASSERT(init);
19930b57cec5SDimitry Andric           init = trip - init;
19940b57cec5SDimitry Andric         } else
19950b57cec5SDimitry Andric           init = 0;
19960b57cec5SDimitry Andric         limit = trip - __kmp_dispatch_guided_remaining<T>(
19970b57cec5SDimitry Andric                            trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
19980b57cec5SDimitry Andric         KMP_ASSERT(init <= limit);
19990b57cec5SDimitry Andric         if (init < limit) {
20000b57cec5SDimitry Andric           KMP_DEBUG_ASSERT(limit <= trip);
20010b57cec5SDimitry Andric           --limit;
20020b57cec5SDimitry Andric           status = 1;
20030b57cec5SDimitry Andric           break;
20040b57cec5SDimitry Andric         } // if
20050b57cec5SDimitry Andric       } // if
20060b57cec5SDimitry Andric     } // while (1)
20070b57cec5SDimitry Andric #if KMP_USE_X87CONTROL
20080b57cec5SDimitry Andric     /* restore FPCW if necessary
20090b57cec5SDimitry Andric        AC: check fpcwSet flag first because oldFpcw can be uninitialized here
20100b57cec5SDimitry Andric     */
20110b57cec5SDimitry Andric     if (fpcwSet && (oldFpcw & fpcwSet))
20120b57cec5SDimitry Andric       _control87(oldFpcw, _MCW_PC);
20130b57cec5SDimitry Andric #endif
20140b57cec5SDimitry Andric     if (status != 0) {
20150b57cec5SDimitry Andric       start = pr->u.p.lb;
20160b57cec5SDimitry Andric       incr = pr->u.p.st;
20170b57cec5SDimitry Andric       if (p_st != NULL)
20180b57cec5SDimitry Andric         *p_st = incr;
20190b57cec5SDimitry Andric       *p_lb = start + init * incr;
20200b57cec5SDimitry Andric       *p_ub = start + limit * incr;
20210b57cec5SDimitry Andric       if (pr->flags.ordered) {
20220b57cec5SDimitry Andric         pr->u.p.ordered_lower = init;
20230b57cec5SDimitry Andric         pr->u.p.ordered_upper = limit;
20240b57cec5SDimitry Andric       }
20250b57cec5SDimitry Andric     } else {
20260b57cec5SDimitry Andric       *p_lb = 0;
20270b57cec5SDimitry Andric       *p_ub = 0;
20280b57cec5SDimitry Andric       if (p_st != NULL)
20290b57cec5SDimitry Andric         *p_st = 0;
20300b57cec5SDimitry Andric     }
20310b57cec5SDimitry Andric   } // case
20320b57cec5SDimitry Andric   break;
20330b57cec5SDimitry Andric 
20340b57cec5SDimitry Andric   case kmp_sch_trapezoidal: {
20350b57cec5SDimitry Andric     UT index;
20360b57cec5SDimitry Andric     T parm2 = pr->u.p.parm2;
20370b57cec5SDimitry Andric     T parm3 = pr->u.p.parm3;
20380b57cec5SDimitry Andric     T parm4 = pr->u.p.parm4;
20390b57cec5SDimitry Andric     KD_TRACE(100,
20400b57cec5SDimitry Andric              ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",
20410b57cec5SDimitry Andric               gtid));
20420b57cec5SDimitry Andric 
20430b57cec5SDimitry Andric     index = test_then_inc<ST>((volatile ST *)&sh->u.s.iteration);
20440b57cec5SDimitry Andric 
20450b57cec5SDimitry Andric     init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
20460b57cec5SDimitry Andric     trip = pr->u.p.tc - 1;
20470b57cec5SDimitry Andric 
20480b57cec5SDimitry Andric     if ((status = ((T)index < parm3 && init <= trip)) == 0) {
20490b57cec5SDimitry Andric       *p_lb = 0;
20500b57cec5SDimitry Andric       *p_ub = 0;
20510b57cec5SDimitry Andric       if (p_st != NULL)
20520b57cec5SDimitry Andric         *p_st = 0;
20530b57cec5SDimitry Andric     } else {
20540b57cec5SDimitry Andric       start = pr->u.p.lb;
20550b57cec5SDimitry Andric       limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
20560b57cec5SDimitry Andric       incr = pr->u.p.st;
20570b57cec5SDimitry Andric 
20580b57cec5SDimitry Andric       if ((last = (limit >= trip)) != 0)
20590b57cec5SDimitry Andric         limit = trip;
20600b57cec5SDimitry Andric 
20610b57cec5SDimitry Andric       if (p_st != NULL)
20620b57cec5SDimitry Andric         *p_st = incr;
20630b57cec5SDimitry Andric 
20640b57cec5SDimitry Andric       if (incr == 1) {
20650b57cec5SDimitry Andric         *p_lb = start + init;
20660b57cec5SDimitry Andric         *p_ub = start + limit;
20670b57cec5SDimitry Andric       } else {
20680b57cec5SDimitry Andric         *p_lb = start + init * incr;
20690b57cec5SDimitry Andric         *p_ub = start + limit * incr;
20700b57cec5SDimitry Andric       }
20710b57cec5SDimitry Andric 
20720b57cec5SDimitry Andric       if (pr->flags.ordered) {
20730b57cec5SDimitry Andric         pr->u.p.ordered_lower = init;
20740b57cec5SDimitry Andric         pr->u.p.ordered_upper = limit;
20750b57cec5SDimitry Andric       } // if
20760b57cec5SDimitry Andric     } // if
20770b57cec5SDimitry Andric   } // case
20780b57cec5SDimitry Andric   break;
20790b57cec5SDimitry Andric   default: {
20800b57cec5SDimitry Andric     status = 0; // to avoid complaints on uninitialized variable use
20810b57cec5SDimitry Andric     __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected), // Primary message
20820b57cec5SDimitry Andric                 KMP_HNT(GetNewerLibrary), // Hint
20830b57cec5SDimitry Andric                 __kmp_msg_null // Variadic argument list terminator
20840b57cec5SDimitry Andric     );
20850b57cec5SDimitry Andric   } break;
20860b57cec5SDimitry Andric   } // switch
20870b57cec5SDimitry Andric   if (p_last)
20880b57cec5SDimitry Andric     *p_last = last;
20890b57cec5SDimitry Andric #ifdef KMP_DEBUG
20900b57cec5SDimitry Andric   if (pr->flags.ordered) {
20910b57cec5SDimitry Andric     char *buff;
20920b57cec5SDimitry Andric     // create format specifiers before the debug output
20930b57cec5SDimitry Andric     buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d "
20940b57cec5SDimitry Andric                             "ordered_lower:%%%s ordered_upper:%%%s\n",
20950b57cec5SDimitry Andric                             traits_t<UT>::spec, traits_t<UT>::spec);
20960b57cec5SDimitry Andric     KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper));
20970b57cec5SDimitry Andric     __kmp_str_free(&buff);
20980b57cec5SDimitry Andric   }
20990b57cec5SDimitry Andric   {
21000b57cec5SDimitry Andric     char *buff;
21010b57cec5SDimitry Andric     // create format specifiers before the debug output
21020b57cec5SDimitry Andric     buff = __kmp_str_format(
21030b57cec5SDimitry Andric         "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d "
21040b57cec5SDimitry Andric         "p_lb:%%%s p_ub:%%%s p_st:%%%s\n",
21050b57cec5SDimitry Andric         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2106fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(p_last);
2107fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(p_st);
21080b57cec5SDimitry Andric     KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st));
21090b57cec5SDimitry Andric     __kmp_str_free(&buff);
21100b57cec5SDimitry Andric   }
21110b57cec5SDimitry Andric #endif
21120b57cec5SDimitry Andric   return status;
21130b57cec5SDimitry Andric }
21140b57cec5SDimitry Andric 
21150b57cec5SDimitry Andric /* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more
21160b57cec5SDimitry Andric    work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini()
21170b57cec5SDimitry Andric    is not called. */
21180b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
21190b57cec5SDimitry Andric #define OMPT_LOOP_END                                                          \
21200b57cec5SDimitry Andric   if (status == 0) {                                                           \
21210b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_work) {                                     \
21220b57cec5SDimitry Andric       ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);              \
21230b57cec5SDimitry Andric       ompt_task_info_t *task_info = __ompt_get_task_info_object(0);            \
21240b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_work)(                        \
2125*0fca6ea1SDimitry Andric           ompt_get_work_schedule(pr->schedule), ompt_scope_end,                \
2126*0fca6ea1SDimitry Andric           &(team_info->parallel_data), &(task_info->task_data), 0, codeptr);   \
21270b57cec5SDimitry Andric     }                                                                          \
21280b57cec5SDimitry Andric   }
212981ad6265SDimitry Andric #define OMPT_LOOP_DISPATCH(lb, ub, st, status)                                 \
213081ad6265SDimitry Andric   if (ompt_enabled.ompt_callback_dispatch && status) {                         \
213181ad6265SDimitry Andric     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);                \
213281ad6265SDimitry Andric     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);              \
213381ad6265SDimitry Andric     ompt_dispatch_chunk_t chunk;                                               \
213481ad6265SDimitry Andric     ompt_data_t instance = ompt_data_none;                                     \
213581ad6265SDimitry Andric     OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, st);                                \
213681ad6265SDimitry Andric     instance.ptr = &chunk;                                                     \
213781ad6265SDimitry Andric     ompt_callbacks.ompt_callback(ompt_callback_dispatch)(                      \
213881ad6265SDimitry Andric         &(team_info->parallel_data), &(task_info->task_data),                  \
213981ad6265SDimitry Andric         ompt_dispatch_ws_loop_chunk, instance);                                \
214081ad6265SDimitry Andric   }
21410b57cec5SDimitry Andric // TODO: implement count
21420b57cec5SDimitry Andric #else
21430b57cec5SDimitry Andric #define OMPT_LOOP_END // no-op
214461cfbce3SDimitry Andric #define OMPT_LOOP_DISPATCH(lb, ub, st, status) // no-op
21450b57cec5SDimitry Andric #endif
21460b57cec5SDimitry Andric 
21470b57cec5SDimitry Andric #if KMP_STATS_ENABLED
21480b57cec5SDimitry Andric #define KMP_STATS_LOOP_END                                                     \
21490b57cec5SDimitry Andric   {                                                                            \
21500b57cec5SDimitry Andric     kmp_int64 u, l, t, i;                                                      \
21510b57cec5SDimitry Andric     l = (kmp_int64)(*p_lb);                                                    \
21520b57cec5SDimitry Andric     u = (kmp_int64)(*p_ub);                                                    \
21530b57cec5SDimitry Andric     i = (kmp_int64)(pr->u.p.st);                                               \
21540b57cec5SDimitry Andric     if (status == 0) {                                                         \
21550b57cec5SDimitry Andric       t = 0;                                                                   \
21560b57cec5SDimitry Andric       KMP_POP_PARTITIONED_TIMER();                                             \
21570b57cec5SDimitry Andric     } else if (i == 1) {                                                       \
21580b57cec5SDimitry Andric       if (u >= l)                                                              \
21590b57cec5SDimitry Andric         t = u - l + 1;                                                         \
21600b57cec5SDimitry Andric       else                                                                     \
21610b57cec5SDimitry Andric         t = 0;                                                                 \
21620b57cec5SDimitry Andric     } else if (i < 0) {                                                        \
21630b57cec5SDimitry Andric       if (l >= u)                                                              \
21640b57cec5SDimitry Andric         t = (l - u) / (-i) + 1;                                                \
21650b57cec5SDimitry Andric       else                                                                     \
21660b57cec5SDimitry Andric         t = 0;                                                                 \
21670b57cec5SDimitry Andric     } else {                                                                   \
21680b57cec5SDimitry Andric       if (u >= l)                                                              \
21690b57cec5SDimitry Andric         t = (u - l) / i + 1;                                                   \
21700b57cec5SDimitry Andric       else                                                                     \
21710b57cec5SDimitry Andric         t = 0;                                                                 \
21720b57cec5SDimitry Andric     }                                                                          \
21730b57cec5SDimitry Andric     KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t);                           \
21740b57cec5SDimitry Andric   }
21750b57cec5SDimitry Andric #else
21760b57cec5SDimitry Andric #define KMP_STATS_LOOP_END /* Nothing */
21770b57cec5SDimitry Andric #endif
21780b57cec5SDimitry Andric 
21790b57cec5SDimitry Andric template <typename T>
21800b57cec5SDimitry Andric static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
21810b57cec5SDimitry Andric                                T *p_lb, T *p_ub,
21820b57cec5SDimitry Andric                                typename traits_t<T>::signed_t *p_st
21830b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
21840b57cec5SDimitry Andric                                ,
21850b57cec5SDimitry Andric                                void *codeptr
21860b57cec5SDimitry Andric #endif
21870b57cec5SDimitry Andric ) {
21880b57cec5SDimitry Andric 
21890b57cec5SDimitry Andric   typedef typename traits_t<T>::unsigned_t UT;
21900b57cec5SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
21910b57cec5SDimitry Andric   // This is potentially slightly misleading, schedule(runtime) will appear here
21925ffd83dbSDimitry Andric   // even if the actual runtime schedule is static. (Which points out a
2193480093f4SDimitry Andric   // disadvantage of schedule(runtime): even when static scheduling is used it
21940b57cec5SDimitry Andric   // costs more than a compile time choice to use static scheduling would.)
21950b57cec5SDimitry Andric   KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling);
21960b57cec5SDimitry Andric 
21970b57cec5SDimitry Andric   int status;
21980b57cec5SDimitry Andric   dispatch_private_info_template<T> *pr;
2199e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
22000b57cec5SDimitry Andric   kmp_info_t *th = __kmp_threads[gtid];
22010b57cec5SDimitry Andric   kmp_team_t *team = th->th.th_team;
22020b57cec5SDimitry Andric 
22030b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(p_lb && p_ub && p_st); // AC: these cannot be NULL
22040b57cec5SDimitry Andric   KD_TRACE(
22050b57cec5SDimitry Andric       1000,
22060b57cec5SDimitry Andric       ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",
22070b57cec5SDimitry Andric        gtid, p_lb, p_ub, p_st, p_last));
22080b57cec5SDimitry Andric 
22090b57cec5SDimitry Andric   if (team->t.t_serialized) {
22105ffd83dbSDimitry Andric     /* NOTE: serialize this dispatch because we are not at the active level */
22110b57cec5SDimitry Andric     pr = reinterpret_cast<dispatch_private_info_template<T> *>(
22120b57cec5SDimitry Andric         th->th.th_dispatch->th_disp_buffer); /* top of the stack */
22130b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(pr);
22140b57cec5SDimitry Andric 
22150b57cec5SDimitry Andric     if ((status = (pr->u.p.tc != 0)) == 0) {
22160b57cec5SDimitry Andric       *p_lb = 0;
22170b57cec5SDimitry Andric       *p_ub = 0;
22180b57cec5SDimitry Andric       //            if ( p_last != NULL )
22190b57cec5SDimitry Andric       //                *p_last = 0;
22200b57cec5SDimitry Andric       if (p_st != NULL)
22210b57cec5SDimitry Andric         *p_st = 0;
22220b57cec5SDimitry Andric       if (__kmp_env_consistency_check) {
22230b57cec5SDimitry Andric         if (pr->pushed_ws != ct_none) {
22240b57cec5SDimitry Andric           pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
22250b57cec5SDimitry Andric         }
22260b57cec5SDimitry Andric       }
22270b57cec5SDimitry Andric     } else if (pr->flags.nomerge) {
22280b57cec5SDimitry Andric       kmp_int32 last;
22290b57cec5SDimitry Andric       T start;
22300b57cec5SDimitry Andric       UT limit, trip, init;
22310b57cec5SDimitry Andric       ST incr;
22320b57cec5SDimitry Andric       T chunk = pr->u.p.parm1;
22330b57cec5SDimitry Andric 
22340b57cec5SDimitry Andric       KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
22350b57cec5SDimitry Andric                      gtid));
22360b57cec5SDimitry Andric 
22370b57cec5SDimitry Andric       init = chunk * pr->u.p.count++;
22380b57cec5SDimitry Andric       trip = pr->u.p.tc - 1;
22390b57cec5SDimitry Andric 
22400b57cec5SDimitry Andric       if ((status = (init <= trip)) == 0) {
22410b57cec5SDimitry Andric         *p_lb = 0;
22420b57cec5SDimitry Andric         *p_ub = 0;
22430b57cec5SDimitry Andric         //                if ( p_last != NULL )
22440b57cec5SDimitry Andric         //                    *p_last = 0;
22450b57cec5SDimitry Andric         if (p_st != NULL)
22460b57cec5SDimitry Andric           *p_st = 0;
22470b57cec5SDimitry Andric         if (__kmp_env_consistency_check) {
22480b57cec5SDimitry Andric           if (pr->pushed_ws != ct_none) {
22490b57cec5SDimitry Andric             pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
22500b57cec5SDimitry Andric           }
22510b57cec5SDimitry Andric         }
22520b57cec5SDimitry Andric       } else {
22530b57cec5SDimitry Andric         start = pr->u.p.lb;
22540b57cec5SDimitry Andric         limit = chunk + init - 1;
22550b57cec5SDimitry Andric         incr = pr->u.p.st;
22560b57cec5SDimitry Andric 
22570b57cec5SDimitry Andric         if ((last = (limit >= trip)) != 0) {
22580b57cec5SDimitry Andric           limit = trip;
22590b57cec5SDimitry Andric #if KMP_OS_WINDOWS
22600b57cec5SDimitry Andric           pr->u.p.last_upper = pr->u.p.ub;
22610b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
22620b57cec5SDimitry Andric         }
22630b57cec5SDimitry Andric         if (p_last != NULL)
22640b57cec5SDimitry Andric           *p_last = last;
22650b57cec5SDimitry Andric         if (p_st != NULL)
22660b57cec5SDimitry Andric           *p_st = incr;
22670b57cec5SDimitry Andric         if (incr == 1) {
22680b57cec5SDimitry Andric           *p_lb = start + init;
22690b57cec5SDimitry Andric           *p_ub = start + limit;
22700b57cec5SDimitry Andric         } else {
22710b57cec5SDimitry Andric           *p_lb = start + init * incr;
22720b57cec5SDimitry Andric           *p_ub = start + limit * incr;
22730b57cec5SDimitry Andric         }
22740b57cec5SDimitry Andric 
22750b57cec5SDimitry Andric         if (pr->flags.ordered) {
22760b57cec5SDimitry Andric           pr->u.p.ordered_lower = init;
22770b57cec5SDimitry Andric           pr->u.p.ordered_upper = limit;
22780b57cec5SDimitry Andric #ifdef KMP_DEBUG
22790b57cec5SDimitry Andric           {
22800b57cec5SDimitry Andric             char *buff;
22810b57cec5SDimitry Andric             // create format specifiers before the debug output
22820b57cec5SDimitry Andric             buff = __kmp_str_format("__kmp_dispatch_next: T#%%d "
22830b57cec5SDimitry Andric                                     "ordered_lower:%%%s ordered_upper:%%%s\n",
22840b57cec5SDimitry Andric                                     traits_t<UT>::spec, traits_t<UT>::spec);
22850b57cec5SDimitry Andric             KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
22860b57cec5SDimitry Andric                             pr->u.p.ordered_upper));
22870b57cec5SDimitry Andric             __kmp_str_free(&buff);
22880b57cec5SDimitry Andric           }
22890b57cec5SDimitry Andric #endif
22900b57cec5SDimitry Andric         } // if
22910b57cec5SDimitry Andric       } // if
22920b57cec5SDimitry Andric     } else {
22930b57cec5SDimitry Andric       pr->u.p.tc = 0;
22940b57cec5SDimitry Andric       *p_lb = pr->u.p.lb;
22950b57cec5SDimitry Andric       *p_ub = pr->u.p.ub;
22960b57cec5SDimitry Andric #if KMP_OS_WINDOWS
22970b57cec5SDimitry Andric       pr->u.p.last_upper = *p_ub;
22980b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
22990b57cec5SDimitry Andric       if (p_last != NULL)
23000b57cec5SDimitry Andric         *p_last = TRUE;
23010b57cec5SDimitry Andric       if (p_st != NULL)
23020b57cec5SDimitry Andric         *p_st = pr->u.p.st;
23030b57cec5SDimitry Andric     } // if
23040b57cec5SDimitry Andric #ifdef KMP_DEBUG
23050b57cec5SDimitry Andric     {
23060b57cec5SDimitry Andric       char *buff;
23070b57cec5SDimitry Andric       // create format specifiers before the debug output
23080b57cec5SDimitry Andric       buff = __kmp_str_format(
23090b57cec5SDimitry Andric           "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s "
23100b57cec5SDimitry Andric           "p_ub:%%%s p_st:%%%s p_last:%%p %%d  returning:%%d\n",
23110b57cec5SDimitry Andric           traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2312e8d8bef9SDimitry Andric       KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last,
2313e8d8bef9SDimitry Andric                     (p_last ? *p_last : 0), status));
23140b57cec5SDimitry Andric       __kmp_str_free(&buff);
23150b57cec5SDimitry Andric     }
23160b57cec5SDimitry Andric #endif
23170b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS
23180b57cec5SDimitry Andric     SSC_MARK_DISPATCH_NEXT();
23190b57cec5SDimitry Andric #endif
232081ad6265SDimitry Andric     OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
23210b57cec5SDimitry Andric     OMPT_LOOP_END;
23220b57cec5SDimitry Andric     KMP_STATS_LOOP_END;
23230b57cec5SDimitry Andric     return status;
23240b57cec5SDimitry Andric   } else {
23250b57cec5SDimitry Andric     kmp_int32 last = 0;
23260b57cec5SDimitry Andric     dispatch_shared_info_template<T> volatile *sh;
23270b57cec5SDimitry Andric 
23280b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(th->th.th_dispatch ==
23290b57cec5SDimitry Andric                      &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
23300b57cec5SDimitry Andric 
23310b57cec5SDimitry Andric     pr = reinterpret_cast<dispatch_private_info_template<T> *>(
23320b57cec5SDimitry Andric         th->th.th_dispatch->th_dispatch_pr_current);
23330b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(pr);
23340b57cec5SDimitry Andric     sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
23350b57cec5SDimitry Andric         th->th.th_dispatch->th_dispatch_sh_current);
23360b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(sh);
23370b57cec5SDimitry Andric 
23380b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
23390b57cec5SDimitry Andric     if (pr->flags.use_hier)
23400b57cec5SDimitry Andric       status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st);
23410b57cec5SDimitry Andric     else
23420b57cec5SDimitry Andric #endif // KMP_USE_HIER_SCHED
23430b57cec5SDimitry Andric       status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub,
23440b57cec5SDimitry Andric                                                 p_st, th->th.th_team_nproc,
23450b57cec5SDimitry Andric                                                 th->th.th_info.ds.ds_tid);
23460b57cec5SDimitry Andric     // status == 0: no more iterations to execute
23470b57cec5SDimitry Andric     if (status == 0) {
2348fe6060f1SDimitry Andric       ST num_done;
2349fe6060f1SDimitry Andric       num_done = test_then_inc<ST>(&sh->u.s.num_done);
23500b57cec5SDimitry Andric #ifdef KMP_DEBUG
23510b57cec5SDimitry Andric       {
23520b57cec5SDimitry Andric         char *buff;
23530b57cec5SDimitry Andric         // create format specifiers before the debug output
23540b57cec5SDimitry Andric         buff = __kmp_str_format(
23550b57cec5SDimitry Andric             "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2356fe6060f1SDimitry Andric             traits_t<ST>::spec);
23570b57cec5SDimitry Andric         KD_TRACE(10, (buff, gtid, sh->u.s.num_done));
23580b57cec5SDimitry Andric         __kmp_str_free(&buff);
23590b57cec5SDimitry Andric       }
23600b57cec5SDimitry Andric #endif
23610b57cec5SDimitry Andric 
23620b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
23630b57cec5SDimitry Andric       pr->flags.use_hier = FALSE;
23640b57cec5SDimitry Andric #endif
2365fe6060f1SDimitry Andric       if (num_done == th->th.th_team_nproc - 1) {
2366fe6060f1SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
2367fe6060f1SDimitry Andric         if (pr->schedule == kmp_sch_static_steal) {
23680b57cec5SDimitry Andric           int i;
23695ffd83dbSDimitry Andric           int idx = (th->th.th_dispatch->th_disp_index - 1) %
23705ffd83dbSDimitry Andric                     __kmp_dispatch_num_buffers; // current loop index
23710b57cec5SDimitry Andric           // loop complete, safe to destroy locks used for stealing
23720b57cec5SDimitry Andric           for (i = 0; i < th->th.th_team_nproc; ++i) {
23735ffd83dbSDimitry Andric             dispatch_private_info_template<T> *buf =
23745ffd83dbSDimitry Andric                 reinterpret_cast<dispatch_private_info_template<T> *>(
2375fe6060f1SDimitry Andric                     &team->t.t_dispatch[i].th_disp_buffer[idx]);
2376fe6060f1SDimitry Andric             KMP_ASSERT(buf->steal_flag == THIEF); // buffer must be inactive
2377fe6060f1SDimitry Andric             KMP_ATOMIC_ST_RLX(&buf->steal_flag, UNUSED);
2378fe6060f1SDimitry Andric             if (traits_t<T>::type_size > 4) {
2379fe6060f1SDimitry Andric               // destroy locks used for stealing
2380fe6060f1SDimitry Andric               kmp_lock_t *lck = buf->u.p.steal_lock;
23810b57cec5SDimitry Andric               KMP_ASSERT(lck != NULL);
23820b57cec5SDimitry Andric               __kmp_destroy_lock(lck);
23830b57cec5SDimitry Andric               __kmp_free(lck);
2384fe6060f1SDimitry Andric               buf->u.p.steal_lock = NULL;
2385fe6060f1SDimitry Andric             }
23860b57cec5SDimitry Andric           }
23870b57cec5SDimitry Andric         }
23880b57cec5SDimitry Andric #endif
2389fe6060f1SDimitry Andric         /* NOTE: release shared buffer to be reused */
23900b57cec5SDimitry Andric 
23910b57cec5SDimitry Andric         KMP_MB(); /* Flush all pending memory write invalidates.  */
23920b57cec5SDimitry Andric 
23930b57cec5SDimitry Andric         sh->u.s.num_done = 0;
23940b57cec5SDimitry Andric         sh->u.s.iteration = 0;
23950b57cec5SDimitry Andric 
23960b57cec5SDimitry Andric         /* TODO replace with general release procedure? */
23970b57cec5SDimitry Andric         if (pr->flags.ordered) {
23980b57cec5SDimitry Andric           sh->u.s.ordered_iteration = 0;
23990b57cec5SDimitry Andric         }
24000b57cec5SDimitry Andric 
2401*0fca6ea1SDimitry Andric         KMP_MB(); /* Flush all pending memory write invalidates.  */
2402*0fca6ea1SDimitry Andric 
24030b57cec5SDimitry Andric         sh->buffer_index += __kmp_dispatch_num_buffers;
24040b57cec5SDimitry Andric         KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",
24050b57cec5SDimitry Andric                        gtid, sh->buffer_index));
24060b57cec5SDimitry Andric 
24070b57cec5SDimitry Andric         KMP_MB(); /* Flush all pending memory write invalidates.  */
24080b57cec5SDimitry Andric 
24090b57cec5SDimitry Andric       } // if
24100b57cec5SDimitry Andric       if (__kmp_env_consistency_check) {
24110b57cec5SDimitry Andric         if (pr->pushed_ws != ct_none) {
24120b57cec5SDimitry Andric           pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
24130b57cec5SDimitry Andric         }
24140b57cec5SDimitry Andric       }
24150b57cec5SDimitry Andric 
24160b57cec5SDimitry Andric       th->th.th_dispatch->th_deo_fcn = NULL;
24170b57cec5SDimitry Andric       th->th.th_dispatch->th_dxo_fcn = NULL;
24180b57cec5SDimitry Andric       th->th.th_dispatch->th_dispatch_sh_current = NULL;
24190b57cec5SDimitry Andric       th->th.th_dispatch->th_dispatch_pr_current = NULL;
24200b57cec5SDimitry Andric     } // if (status == 0)
24210b57cec5SDimitry Andric #if KMP_OS_WINDOWS
24220b57cec5SDimitry Andric     else if (last) {
24230b57cec5SDimitry Andric       pr->u.p.last_upper = pr->u.p.ub;
24240b57cec5SDimitry Andric     }
24250b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
24260b57cec5SDimitry Andric     if (p_last != NULL && status != 0)
24270b57cec5SDimitry Andric       *p_last = last;
24280b57cec5SDimitry Andric   } // if
24290b57cec5SDimitry Andric 
24300b57cec5SDimitry Andric #ifdef KMP_DEBUG
24310b57cec5SDimitry Andric   {
24320b57cec5SDimitry Andric     char *buff;
24330b57cec5SDimitry Andric     // create format specifiers before the debug output
24340b57cec5SDimitry Andric     buff = __kmp_str_format(
24350b57cec5SDimitry Andric         "__kmp_dispatch_next: T#%%d normal case: "
24360b57cec5SDimitry Andric         "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n",
24370b57cec5SDimitry Andric         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
24380b57cec5SDimitry Andric     KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,
24390b57cec5SDimitry Andric                   (p_last ? *p_last : 0), status));
24400b57cec5SDimitry Andric     __kmp_str_free(&buff);
24410b57cec5SDimitry Andric   }
24420b57cec5SDimitry Andric #endif
24430b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS
24440b57cec5SDimitry Andric   SSC_MARK_DISPATCH_NEXT();
24450b57cec5SDimitry Andric #endif
244681ad6265SDimitry Andric   OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
24470b57cec5SDimitry Andric   OMPT_LOOP_END;
24480b57cec5SDimitry Andric   KMP_STATS_LOOP_END;
24490b57cec5SDimitry Andric   return status;
24500b57cec5SDimitry Andric }
24510b57cec5SDimitry Andric 
2452753f127fSDimitry Andric /*!
2453753f127fSDimitry Andric @ingroup WORK_SHARING
2454753f127fSDimitry Andric @param loc  source location information
2455753f127fSDimitry Andric @param global_tid  global thread number
2456753f127fSDimitry Andric @return Zero if the parallel region is not active and this thread should execute
2457753f127fSDimitry Andric all sections, non-zero otherwise.
2458753f127fSDimitry Andric 
2459753f127fSDimitry Andric Beginning of sections construct.
2460753f127fSDimitry Andric There are no implicit barriers in the "sections" calls, rather the compiler
2461753f127fSDimitry Andric should introduce an explicit barrier if it is required.
2462753f127fSDimitry Andric 
2463753f127fSDimitry Andric This implementation is based on __kmp_dispatch_init, using same constructs for
2464753f127fSDimitry Andric shared data (we can't have sections nested directly in omp for loop, there
2465753f127fSDimitry Andric should be a parallel region in between)
2466753f127fSDimitry Andric */
2467753f127fSDimitry Andric kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid) {
2468753f127fSDimitry Andric 
2469753f127fSDimitry Andric   int active;
2470753f127fSDimitry Andric   kmp_info_t *th;
2471753f127fSDimitry Andric   kmp_team_t *team;
2472753f127fSDimitry Andric   kmp_uint32 my_buffer_index;
2473753f127fSDimitry Andric   dispatch_shared_info_template<kmp_int32> volatile *sh;
2474753f127fSDimitry Andric 
2475753f127fSDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
2476753f127fSDimitry Andric 
2477753f127fSDimitry Andric   if (!TCR_4(__kmp_init_parallel))
2478753f127fSDimitry Andric     __kmp_parallel_initialize();
2479753f127fSDimitry Andric   __kmp_resume_if_soft_paused();
2480753f127fSDimitry Andric 
2481753f127fSDimitry Andric   /* setup data */
2482753f127fSDimitry Andric   th = __kmp_threads[gtid];
2483753f127fSDimitry Andric   team = th->th.th_team;
2484753f127fSDimitry Andric   active = !team->t.t_serialized;
2485753f127fSDimitry Andric   th->th.th_ident = loc;
2486753f127fSDimitry Andric 
2487753f127fSDimitry Andric   KMP_COUNT_BLOCK(OMP_SECTIONS);
2488753f127fSDimitry Andric   KD_TRACE(10, ("__kmpc_sections: called by T#%d\n", gtid));
2489753f127fSDimitry Andric 
2490753f127fSDimitry Andric   if (active) {
2491753f127fSDimitry Andric     // Setup sections in the same way as dynamic scheduled loops.
2492753f127fSDimitry Andric     // We need one shared data: which section is to execute next.
2493753f127fSDimitry Andric     // (in case parallel is not active, all sections will be executed on the
2494753f127fSDimitry Andric     // same thread)
2495753f127fSDimitry Andric     KMP_DEBUG_ASSERT(th->th.th_dispatch ==
2496753f127fSDimitry Andric                      &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2497753f127fSDimitry Andric 
2498753f127fSDimitry Andric     my_buffer_index = th->th.th_dispatch->th_disp_index++;
2499753f127fSDimitry Andric 
2500753f127fSDimitry Andric     // reuse shared data structures from dynamic sched loops:
2501753f127fSDimitry Andric     sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
2502753f127fSDimitry Andric         &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
2503753f127fSDimitry Andric     KD_TRACE(10, ("__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid,
2504753f127fSDimitry Andric                   my_buffer_index));
2505753f127fSDimitry Andric 
2506753f127fSDimitry Andric     th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
2507753f127fSDimitry Andric     th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
2508753f127fSDimitry Andric 
2509753f127fSDimitry Andric     KD_TRACE(100, ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
2510753f127fSDimitry Andric                    "sh->buffer_index:%d\n",
2511753f127fSDimitry Andric                    gtid, my_buffer_index, sh->buffer_index));
2512753f127fSDimitry Andric     __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
2513753f127fSDimitry Andric                            __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
2514753f127fSDimitry Andric     // Note: KMP_WAIT() cannot be used there: buffer index and
2515753f127fSDimitry Andric     // my_buffer_index are *always* 32-bit integers.
2516753f127fSDimitry Andric     KMP_MB();
2517753f127fSDimitry Andric     KD_TRACE(100, ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
2518753f127fSDimitry Andric                    "sh->buffer_index:%d\n",
2519753f127fSDimitry Andric                    gtid, my_buffer_index, sh->buffer_index));
2520753f127fSDimitry Andric 
2521753f127fSDimitry Andric     th->th.th_dispatch->th_dispatch_pr_current =
2522753f127fSDimitry Andric         nullptr; // sections construct doesn't need private data
2523753f127fSDimitry Andric     th->th.th_dispatch->th_dispatch_sh_current =
2524753f127fSDimitry Andric         CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh);
2525753f127fSDimitry Andric   }
2526753f127fSDimitry Andric 
2527753f127fSDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
2528753f127fSDimitry Andric   if (ompt_enabled.ompt_callback_work) {
2529753f127fSDimitry Andric     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2530753f127fSDimitry Andric     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2531753f127fSDimitry Andric     ompt_callbacks.ompt_callback(ompt_callback_work)(
2532753f127fSDimitry Andric         ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data),
2533753f127fSDimitry Andric         &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2534753f127fSDimitry Andric   }
2535753f127fSDimitry Andric #endif
2536753f127fSDimitry Andric   KMP_PUSH_PARTITIONED_TIMER(OMP_sections);
2537753f127fSDimitry Andric 
2538753f127fSDimitry Andric   return active;
2539753f127fSDimitry Andric }
2540753f127fSDimitry Andric 
2541753f127fSDimitry Andric /*!
2542753f127fSDimitry Andric @ingroup WORK_SHARING
2543753f127fSDimitry Andric @param loc  source location information
2544753f127fSDimitry Andric @param global_tid  global thread number
2545753f127fSDimitry Andric @param numberOfSections  number of sections in the 'sections' construct
2546753f127fSDimitry Andric @return unsigned [from 0 to n) - number (id) of the section to execute next on
2547753f127fSDimitry Andric this thread. n (or any other number not in range) - nothing to execute on this
2548753f127fSDimitry Andric thread
2549753f127fSDimitry Andric */
2550753f127fSDimitry Andric 
2551753f127fSDimitry Andric kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid,
2552753f127fSDimitry Andric                               kmp_int32 numberOfSections) {
2553753f127fSDimitry Andric 
2554bdd1243dSDimitry Andric   KMP_TIME_PARTITIONED_BLOCK(OMP_sections_overhead);
2555753f127fSDimitry Andric 
2556753f127fSDimitry Andric   kmp_info_t *th = __kmp_threads[gtid];
2557753f127fSDimitry Andric #ifdef KMP_DEBUG
2558753f127fSDimitry Andric   kmp_team_t *team = th->th.th_team;
2559753f127fSDimitry Andric #endif
2560753f127fSDimitry Andric 
2561753f127fSDimitry Andric   KD_TRACE(1000, ("__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid,
2562753f127fSDimitry Andric                   numberOfSections));
2563753f127fSDimitry Andric 
2564753f127fSDimitry Andric   // For serialized case we should not call this function:
2565753f127fSDimitry Andric   KMP_DEBUG_ASSERT(!team->t.t_serialized);
2566753f127fSDimitry Andric 
2567753f127fSDimitry Andric   dispatch_shared_info_template<kmp_int32> volatile *sh;
2568753f127fSDimitry Andric 
2569753f127fSDimitry Andric   KMP_DEBUG_ASSERT(th->th.th_dispatch ==
2570753f127fSDimitry Andric                    &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2571753f127fSDimitry Andric 
2572753f127fSDimitry Andric   KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current));
2573753f127fSDimitry Andric   sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
2574753f127fSDimitry Andric       th->th.th_dispatch->th_dispatch_sh_current);
2575753f127fSDimitry Andric   KMP_DEBUG_ASSERT(sh);
2576753f127fSDimitry Andric 
2577753f127fSDimitry Andric   kmp_int32 sectionIndex = 0;
2578753f127fSDimitry Andric   bool moreSectionsToExecute = true;
2579753f127fSDimitry Andric 
2580753f127fSDimitry Andric   // Find section to execute:
2581753f127fSDimitry Andric   sectionIndex = test_then_inc<kmp_int32>((kmp_int32 *)&sh->u.s.iteration);
2582753f127fSDimitry Andric   if (sectionIndex >= numberOfSections) {
2583753f127fSDimitry Andric     moreSectionsToExecute = false;
2584753f127fSDimitry Andric   }
2585753f127fSDimitry Andric 
2586753f127fSDimitry Andric   // status == 0: no more sections to execute;
2587753f127fSDimitry Andric   // OMPTODO: __kmpc_end_sections could be bypassed?
2588753f127fSDimitry Andric   if (!moreSectionsToExecute) {
2589753f127fSDimitry Andric     kmp_int32 num_done;
2590753f127fSDimitry Andric 
2591753f127fSDimitry Andric     num_done = test_then_inc<kmp_int32>((kmp_int32 *)(&sh->u.s.num_done));
2592753f127fSDimitry Andric 
2593753f127fSDimitry Andric     if (num_done == th->th.th_team_nproc - 1) {
2594753f127fSDimitry Andric       /* NOTE: release this buffer to be reused */
2595753f127fSDimitry Andric 
2596753f127fSDimitry Andric       KMP_MB(); /* Flush all pending memory write invalidates.  */
2597753f127fSDimitry Andric 
2598753f127fSDimitry Andric       sh->u.s.num_done = 0;
2599753f127fSDimitry Andric       sh->u.s.iteration = 0;
2600753f127fSDimitry Andric 
2601753f127fSDimitry Andric       KMP_MB(); /* Flush all pending memory write invalidates.  */
2602753f127fSDimitry Andric 
2603753f127fSDimitry Andric       sh->buffer_index += __kmp_dispatch_num_buffers;
2604753f127fSDimitry Andric       KD_TRACE(100, ("__kmpc_next_section: T#%d change buffer_index:%d\n", gtid,
2605753f127fSDimitry Andric                      sh->buffer_index));
2606753f127fSDimitry Andric 
2607753f127fSDimitry Andric       KMP_MB(); /* Flush all pending memory write invalidates.  */
2608753f127fSDimitry Andric 
2609753f127fSDimitry Andric     } // if
2610753f127fSDimitry Andric 
2611753f127fSDimitry Andric     th->th.th_dispatch->th_deo_fcn = NULL;
2612753f127fSDimitry Andric     th->th.th_dispatch->th_dxo_fcn = NULL;
2613753f127fSDimitry Andric     th->th.th_dispatch->th_dispatch_sh_current = NULL;
2614753f127fSDimitry Andric     th->th.th_dispatch->th_dispatch_pr_current = NULL;
2615753f127fSDimitry Andric 
2616753f127fSDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
2617753f127fSDimitry Andric     if (ompt_enabled.ompt_callback_dispatch) {
2618753f127fSDimitry Andric       ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2619753f127fSDimitry Andric       ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2620753f127fSDimitry Andric       ompt_data_t instance = ompt_data_none;
2621753f127fSDimitry Andric       instance.ptr = OMPT_GET_RETURN_ADDRESS(0);
2622753f127fSDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
2623753f127fSDimitry Andric           &(team_info->parallel_data), &(task_info->task_data),
2624753f127fSDimitry Andric           ompt_dispatch_section, instance);
2625753f127fSDimitry Andric     }
2626753f127fSDimitry Andric #endif
2627753f127fSDimitry Andric   }
2628753f127fSDimitry Andric 
2629753f127fSDimitry Andric   return sectionIndex;
2630753f127fSDimitry Andric }
2631753f127fSDimitry Andric 
2632753f127fSDimitry Andric /*!
2633753f127fSDimitry Andric @ingroup WORK_SHARING
2634753f127fSDimitry Andric @param loc  source location information
2635753f127fSDimitry Andric @param global_tid  global thread number
2636753f127fSDimitry Andric 
2637753f127fSDimitry Andric End of "sections" construct.
2638753f127fSDimitry Andric Don't need to wait here: barrier is added separately when needed.
2639753f127fSDimitry Andric */
2640753f127fSDimitry Andric void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid) {
2641753f127fSDimitry Andric 
2642753f127fSDimitry Andric   kmp_info_t *th = __kmp_threads[gtid];
2643753f127fSDimitry Andric   int active = !th->th.th_team->t.t_serialized;
2644753f127fSDimitry Andric 
2645753f127fSDimitry Andric   KD_TRACE(100, ("__kmpc_end_sections: T#%d called\n", gtid));
2646753f127fSDimitry Andric 
2647753f127fSDimitry Andric   if (!active) {
2648753f127fSDimitry Andric     // In active case call finalization is done in __kmpc_next_section
2649753f127fSDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
2650753f127fSDimitry Andric     if (ompt_enabled.ompt_callback_work) {
2651753f127fSDimitry Andric       ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2652753f127fSDimitry Andric       ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2653753f127fSDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_work)(
2654753f127fSDimitry Andric           ompt_work_sections, ompt_scope_end, &(team_info->parallel_data),
2655753f127fSDimitry Andric           &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2656753f127fSDimitry Andric     }
2657753f127fSDimitry Andric #endif
2658753f127fSDimitry Andric   }
2659753f127fSDimitry Andric 
2660bdd1243dSDimitry Andric   KMP_POP_PARTITIONED_TIMER();
2661753f127fSDimitry Andric   KD_TRACE(100, ("__kmpc_end_sections: T#%d returned\n", gtid));
2662753f127fSDimitry Andric }
2663753f127fSDimitry Andric 
26640b57cec5SDimitry Andric template <typename T>
26650b57cec5SDimitry Andric static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid,
26660b57cec5SDimitry Andric                                   kmp_int32 *plastiter, T *plower, T *pupper,
26670b57cec5SDimitry Andric                                   typename traits_t<T>::signed_t incr) {
26680b57cec5SDimitry Andric   typedef typename traits_t<T>::unsigned_t UT;
26690b57cec5SDimitry Andric   kmp_uint32 team_id;
26700b57cec5SDimitry Andric   kmp_uint32 nteams;
26710b57cec5SDimitry Andric   UT trip_count;
26720b57cec5SDimitry Andric   kmp_team_t *team;
26730b57cec5SDimitry Andric   kmp_info_t *th;
26740b57cec5SDimitry Andric 
26750b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(plastiter && plower && pupper);
26760b57cec5SDimitry Andric   KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid));
26770b57cec5SDimitry Andric #ifdef KMP_DEBUG
26780b57cec5SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
26790b57cec5SDimitry Andric   {
26800b57cec5SDimitry Andric     char *buff;
26810b57cec5SDimitry Andric     // create format specifiers before the debug output
26820b57cec5SDimitry Andric     buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d "
26830b57cec5SDimitry Andric                             "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
26840b57cec5SDimitry Andric                             traits_t<T>::spec, traits_t<T>::spec,
26850b57cec5SDimitry Andric                             traits_t<ST>::spec, traits_t<T>::spec);
26860b57cec5SDimitry Andric     KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr));
26870b57cec5SDimitry Andric     __kmp_str_free(&buff);
26880b57cec5SDimitry Andric   }
26890b57cec5SDimitry Andric #endif
26900b57cec5SDimitry Andric 
26910b57cec5SDimitry Andric   if (__kmp_env_consistency_check) {
26920b57cec5SDimitry Andric     if (incr == 0) {
26930b57cec5SDimitry Andric       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
26940b57cec5SDimitry Andric                             loc);
26950b57cec5SDimitry Andric     }
26960b57cec5SDimitry Andric     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
26970b57cec5SDimitry Andric       // The loop is illegal.
26980b57cec5SDimitry Andric       // Some zero-trip loops maintained by compiler, e.g.:
26990b57cec5SDimitry Andric       //   for(i=10;i<0;++i) // lower >= upper - run-time check
27000b57cec5SDimitry Andric       //   for(i=0;i>10;--i) // lower <= upper - run-time check
27010b57cec5SDimitry Andric       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
27020b57cec5SDimitry Andric       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
27030b57cec5SDimitry Andric       // Compiler does not check the following illegal loops:
27040b57cec5SDimitry Andric       //   for(i=0;i<10;i+=incr) // where incr<0
27050b57cec5SDimitry Andric       //   for(i=10;i>0;i-=incr) // where incr<0
27060b57cec5SDimitry Andric       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
27070b57cec5SDimitry Andric     }
27080b57cec5SDimitry Andric   }
2709e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
27100b57cec5SDimitry Andric   th = __kmp_threads[gtid];
27110b57cec5SDimitry Andric   team = th->th.th_team;
27120b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
27130b57cec5SDimitry Andric   nteams = th->th.th_teams_size.nteams;
27140b57cec5SDimitry Andric   team_id = team->t.t_master_tid;
27150b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
27160b57cec5SDimitry Andric 
27170b57cec5SDimitry Andric   // compute global trip count
27180b57cec5SDimitry Andric   if (incr == 1) {
27190b57cec5SDimitry Andric     trip_count = *pupper - *plower + 1;
27200b57cec5SDimitry Andric   } else if (incr == -1) {
27210b57cec5SDimitry Andric     trip_count = *plower - *pupper + 1;
27220b57cec5SDimitry Andric   } else if (incr > 0) {
27230b57cec5SDimitry Andric     // upper-lower can exceed the limit of signed type
27240b57cec5SDimitry Andric     trip_count = (UT)(*pupper - *plower) / incr + 1;
27250b57cec5SDimitry Andric   } else {
27260b57cec5SDimitry Andric     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
27270b57cec5SDimitry Andric   }
27280b57cec5SDimitry Andric 
27290b57cec5SDimitry Andric   if (trip_count <= nteams) {
27300b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(
27310b57cec5SDimitry Andric         __kmp_static == kmp_sch_static_greedy ||
27320b57cec5SDimitry Andric         __kmp_static ==
27330b57cec5SDimitry Andric             kmp_sch_static_balanced); // Unknown static scheduling type.
27340b57cec5SDimitry Andric     // only some teams get single iteration, others get nothing
27350b57cec5SDimitry Andric     if (team_id < trip_count) {
27360b57cec5SDimitry Andric       *pupper = *plower = *plower + team_id * incr;
27370b57cec5SDimitry Andric     } else {
27380b57cec5SDimitry Andric       *plower = *pupper + incr; // zero-trip loop
27390b57cec5SDimitry Andric     }
27400b57cec5SDimitry Andric     if (plastiter != NULL)
27410b57cec5SDimitry Andric       *plastiter = (team_id == trip_count - 1);
27420b57cec5SDimitry Andric   } else {
27430b57cec5SDimitry Andric     if (__kmp_static == kmp_sch_static_balanced) {
27440b57cec5SDimitry Andric       UT chunk = trip_count / nteams;
27450b57cec5SDimitry Andric       UT extras = trip_count % nteams;
27460b57cec5SDimitry Andric       *plower +=
27470b57cec5SDimitry Andric           incr * (team_id * chunk + (team_id < extras ? team_id : extras));
27480b57cec5SDimitry Andric       *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
27490b57cec5SDimitry Andric       if (plastiter != NULL)
27500b57cec5SDimitry Andric         *plastiter = (team_id == nteams - 1);
27510b57cec5SDimitry Andric     } else {
27520b57cec5SDimitry Andric       T chunk_inc_count =
27530b57cec5SDimitry Andric           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
27540b57cec5SDimitry Andric       T upper = *pupper;
27550b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
27560b57cec5SDimitry Andric       // Unknown static scheduling type.
27570b57cec5SDimitry Andric       *plower += team_id * chunk_inc_count;
27580b57cec5SDimitry Andric       *pupper = *plower + chunk_inc_count - incr;
27590b57cec5SDimitry Andric       // Check/correct bounds if needed
27600b57cec5SDimitry Andric       if (incr > 0) {
27610b57cec5SDimitry Andric         if (*pupper < *plower)
27620b57cec5SDimitry Andric           *pupper = traits_t<T>::max_value;
27630b57cec5SDimitry Andric         if (plastiter != NULL)
27640b57cec5SDimitry Andric           *plastiter = *plower <= upper && *pupper > upper - incr;
27650b57cec5SDimitry Andric         if (*pupper > upper)
27660b57cec5SDimitry Andric           *pupper = upper; // tracker C73258
27670b57cec5SDimitry Andric       } else {
27680b57cec5SDimitry Andric         if (*pupper > *plower)
27690b57cec5SDimitry Andric           *pupper = traits_t<T>::min_value;
27700b57cec5SDimitry Andric         if (plastiter != NULL)
27710b57cec5SDimitry Andric           *plastiter = *plower >= upper && *pupper < upper - incr;
27720b57cec5SDimitry Andric         if (*pupper < upper)
27730b57cec5SDimitry Andric           *pupper = upper; // tracker C73258
27740b57cec5SDimitry Andric       }
27750b57cec5SDimitry Andric     }
27760b57cec5SDimitry Andric   }
27770b57cec5SDimitry Andric }
27780b57cec5SDimitry Andric 
27790b57cec5SDimitry Andric //-----------------------------------------------------------------------------
27800b57cec5SDimitry Andric // Dispatch routines
27810b57cec5SDimitry Andric //    Transfer call to template< type T >
27820b57cec5SDimitry Andric //    __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule,
27830b57cec5SDimitry Andric //                         T lb, T ub, ST st, ST chunk )
27840b57cec5SDimitry Andric extern "C" {
27850b57cec5SDimitry Andric 
27860b57cec5SDimitry Andric /*!
27870b57cec5SDimitry Andric @ingroup WORK_SHARING
27880b57cec5SDimitry Andric @{
27890b57cec5SDimitry Andric @param loc Source location
27900b57cec5SDimitry Andric @param gtid Global thread id
27910b57cec5SDimitry Andric @param schedule Schedule type
27920b57cec5SDimitry Andric @param lb  Lower bound
27930b57cec5SDimitry Andric @param ub  Upper bound
27940b57cec5SDimitry Andric @param st  Step (or increment if you prefer)
27950b57cec5SDimitry Andric @param chunk The chunk size to block with
27960b57cec5SDimitry Andric 
27970b57cec5SDimitry Andric This function prepares the runtime to start a dynamically scheduled for loop,
27980b57cec5SDimitry Andric saving the loop arguments.
27990b57cec5SDimitry Andric These functions are all identical apart from the types of the arguments.
28000b57cec5SDimitry Andric */
28010b57cec5SDimitry Andric 
28020b57cec5SDimitry Andric void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
28030b57cec5SDimitry Andric                             enum sched_type schedule, kmp_int32 lb,
28040b57cec5SDimitry Andric                             kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
28050b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
28060b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
28070b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
28080b57cec5SDimitry Andric #endif
28090b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
28100b57cec5SDimitry Andric }
28110b57cec5SDimitry Andric /*!
28120b57cec5SDimitry Andric See @ref __kmpc_dispatch_init_4
28130b57cec5SDimitry Andric */
28140b57cec5SDimitry Andric void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
28150b57cec5SDimitry Andric                              enum sched_type schedule, kmp_uint32 lb,
28160b57cec5SDimitry Andric                              kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
28170b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
28180b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
28190b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
28200b57cec5SDimitry Andric #endif
28210b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
28220b57cec5SDimitry Andric }
28230b57cec5SDimitry Andric 
28240b57cec5SDimitry Andric /*!
28250b57cec5SDimitry Andric See @ref __kmpc_dispatch_init_4
28260b57cec5SDimitry Andric */
28270b57cec5SDimitry Andric void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
28280b57cec5SDimitry Andric                             enum sched_type schedule, kmp_int64 lb,
28290b57cec5SDimitry Andric                             kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
28300b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
28310b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
28320b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
28330b57cec5SDimitry Andric #endif
28340b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
28350b57cec5SDimitry Andric }
28360b57cec5SDimitry Andric 
28370b57cec5SDimitry Andric /*!
28380b57cec5SDimitry Andric See @ref __kmpc_dispatch_init_4
28390b57cec5SDimitry Andric */
28400b57cec5SDimitry Andric void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
28410b57cec5SDimitry Andric                              enum sched_type schedule, kmp_uint64 lb,
28420b57cec5SDimitry Andric                              kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
28430b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
28440b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
28450b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
28460b57cec5SDimitry Andric #endif
28470b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
28480b57cec5SDimitry Andric }
28490b57cec5SDimitry Andric 
28500b57cec5SDimitry Andric /*!
28510b57cec5SDimitry Andric See @ref __kmpc_dispatch_init_4
28520b57cec5SDimitry Andric 
28530b57cec5SDimitry Andric Difference from __kmpc_dispatch_init set of functions is these functions
28540b57cec5SDimitry Andric are called for composite distribute parallel for construct. Thus before
28550b57cec5SDimitry Andric regular iterations dispatching we need to calc per-team iteration space.
28560b57cec5SDimitry Andric 
28570b57cec5SDimitry Andric These functions are all identical apart from the types of the arguments.
28580b57cec5SDimitry Andric */
28590b57cec5SDimitry Andric void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
28600b57cec5SDimitry Andric                                  enum sched_type schedule, kmp_int32 *p_last,
28610b57cec5SDimitry Andric                                  kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
28620b57cec5SDimitry Andric                                  kmp_int32 chunk) {
28630b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
28640b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
28650b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
28660b57cec5SDimitry Andric #endif
28670b57cec5SDimitry Andric   __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
28680b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
28690b57cec5SDimitry Andric }
28700b57cec5SDimitry Andric 
28710b57cec5SDimitry Andric void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
28720b57cec5SDimitry Andric                                   enum sched_type schedule, kmp_int32 *p_last,
28730b57cec5SDimitry Andric                                   kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
28740b57cec5SDimitry Andric                                   kmp_int32 chunk) {
28750b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
28760b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
28770b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
28780b57cec5SDimitry Andric #endif
28790b57cec5SDimitry Andric   __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
28800b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
28810b57cec5SDimitry Andric }
28820b57cec5SDimitry Andric 
28830b57cec5SDimitry Andric void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
28840b57cec5SDimitry Andric                                  enum sched_type schedule, kmp_int32 *p_last,
28850b57cec5SDimitry Andric                                  kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
28860b57cec5SDimitry Andric                                  kmp_int64 chunk) {
28870b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
28880b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
28890b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
28900b57cec5SDimitry Andric #endif
28910b57cec5SDimitry Andric   __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
28920b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
28930b57cec5SDimitry Andric }
28940b57cec5SDimitry Andric 
28950b57cec5SDimitry Andric void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
28960b57cec5SDimitry Andric                                   enum sched_type schedule, kmp_int32 *p_last,
28970b57cec5SDimitry Andric                                   kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
28980b57cec5SDimitry Andric                                   kmp_int64 chunk) {
28990b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
29000b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29010b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
29020b57cec5SDimitry Andric #endif
29030b57cec5SDimitry Andric   __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
29040b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
29050b57cec5SDimitry Andric }
29060b57cec5SDimitry Andric 
29070b57cec5SDimitry Andric /*!
29080b57cec5SDimitry Andric @param loc Source code location
29090b57cec5SDimitry Andric @param gtid Global thread id
29100b57cec5SDimitry Andric @param p_last Pointer to a flag set to one if this is the last chunk or zero
29110b57cec5SDimitry Andric otherwise
29120b57cec5SDimitry Andric @param p_lb   Pointer to the lower bound for the next chunk of work
29130b57cec5SDimitry Andric @param p_ub   Pointer to the upper bound for the next chunk of work
29140b57cec5SDimitry Andric @param p_st   Pointer to the stride for the next chunk of work
29150b57cec5SDimitry Andric @return one if there is work to be done, zero otherwise
29160b57cec5SDimitry Andric 
29170b57cec5SDimitry Andric Get the next dynamically allocated chunk of work for this thread.
29180b57cec5SDimitry Andric If there is no more work, then the lb,ub and stride need not be modified.
29190b57cec5SDimitry Andric */
29200b57cec5SDimitry Andric int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
29210b57cec5SDimitry Andric                            kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
29220b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29230b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
29240b57cec5SDimitry Andric #endif
29250b57cec5SDimitry Andric   return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
29260b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29270b57cec5SDimitry Andric                                         ,
29280b57cec5SDimitry Andric                                         OMPT_LOAD_RETURN_ADDRESS(gtid)
29290b57cec5SDimitry Andric #endif
29300b57cec5SDimitry Andric   );
29310b57cec5SDimitry Andric }
29320b57cec5SDimitry Andric 
29330b57cec5SDimitry Andric /*!
29340b57cec5SDimitry Andric See @ref __kmpc_dispatch_next_4
29350b57cec5SDimitry Andric */
29360b57cec5SDimitry Andric int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
29370b57cec5SDimitry Andric                             kmp_uint32 *p_lb, kmp_uint32 *p_ub,
29380b57cec5SDimitry Andric                             kmp_int32 *p_st) {
29390b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29400b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
29410b57cec5SDimitry Andric #endif
29420b57cec5SDimitry Andric   return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
29430b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29440b57cec5SDimitry Andric                                          ,
29450b57cec5SDimitry Andric                                          OMPT_LOAD_RETURN_ADDRESS(gtid)
29460b57cec5SDimitry Andric #endif
29470b57cec5SDimitry Andric   );
29480b57cec5SDimitry Andric }
29490b57cec5SDimitry Andric 
29500b57cec5SDimitry Andric /*!
29510b57cec5SDimitry Andric See @ref __kmpc_dispatch_next_4
29520b57cec5SDimitry Andric */
29530b57cec5SDimitry Andric int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
29540b57cec5SDimitry Andric                            kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
29550b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29560b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
29570b57cec5SDimitry Andric #endif
29580b57cec5SDimitry Andric   return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
29590b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29600b57cec5SDimitry Andric                                         ,
29610b57cec5SDimitry Andric                                         OMPT_LOAD_RETURN_ADDRESS(gtid)
29620b57cec5SDimitry Andric #endif
29630b57cec5SDimitry Andric   );
29640b57cec5SDimitry Andric }
29650b57cec5SDimitry Andric 
29660b57cec5SDimitry Andric /*!
29670b57cec5SDimitry Andric See @ref __kmpc_dispatch_next_4
29680b57cec5SDimitry Andric */
29690b57cec5SDimitry Andric int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
29700b57cec5SDimitry Andric                             kmp_uint64 *p_lb, kmp_uint64 *p_ub,
29710b57cec5SDimitry Andric                             kmp_int64 *p_st) {
29720b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29730b57cec5SDimitry Andric   OMPT_STORE_RETURN_ADDRESS(gtid);
29740b57cec5SDimitry Andric #endif
29750b57cec5SDimitry Andric   return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
29760b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29770b57cec5SDimitry Andric                                          ,
29780b57cec5SDimitry Andric                                          OMPT_LOAD_RETURN_ADDRESS(gtid)
29790b57cec5SDimitry Andric #endif
29800b57cec5SDimitry Andric   );
29810b57cec5SDimitry Andric }
29820b57cec5SDimitry Andric 
29830b57cec5SDimitry Andric /*!
29840b57cec5SDimitry Andric @param loc Source code location
29850b57cec5SDimitry Andric @param gtid Global thread id
29860b57cec5SDimitry Andric 
29870b57cec5SDimitry Andric Mark the end of a dynamic loop.
29880b57cec5SDimitry Andric */
29890b57cec5SDimitry Andric void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) {
29900b57cec5SDimitry Andric   __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
29910b57cec5SDimitry Andric }
29920b57cec5SDimitry Andric 
29930b57cec5SDimitry Andric /*!
29940b57cec5SDimitry Andric See @ref __kmpc_dispatch_fini_4
29950b57cec5SDimitry Andric */
29960b57cec5SDimitry Andric void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) {
29970b57cec5SDimitry Andric   __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
29980b57cec5SDimitry Andric }
29990b57cec5SDimitry Andric 
30000b57cec5SDimitry Andric /*!
30010b57cec5SDimitry Andric See @ref __kmpc_dispatch_fini_4
30020b57cec5SDimitry Andric */
30030b57cec5SDimitry Andric void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) {
30040b57cec5SDimitry Andric   __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
30050b57cec5SDimitry Andric }
30060b57cec5SDimitry Andric 
30070b57cec5SDimitry Andric /*!
30080b57cec5SDimitry Andric See @ref __kmpc_dispatch_fini_4
30090b57cec5SDimitry Andric */
30100b57cec5SDimitry Andric void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) {
30110b57cec5SDimitry Andric   __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
30120b57cec5SDimitry Andric }
3013*0fca6ea1SDimitry Andric 
3014*0fca6ea1SDimitry Andric /*!
3015*0fca6ea1SDimitry Andric See @ref __kmpc_dispatch_deinit
3016*0fca6ea1SDimitry Andric */
3017*0fca6ea1SDimitry Andric void __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 gtid) {}
30180b57cec5SDimitry Andric /*! @} */
30190b57cec5SDimitry Andric 
30200b57cec5SDimitry Andric //-----------------------------------------------------------------------------
30210b57cec5SDimitry Andric // Non-template routines from kmp_dispatch.cpp used in other sources
30220b57cec5SDimitry Andric 
30230b57cec5SDimitry Andric kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
30240b57cec5SDimitry Andric   return value == checker;
30250b57cec5SDimitry Andric }
30260b57cec5SDimitry Andric 
30270b57cec5SDimitry Andric kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
30280b57cec5SDimitry Andric   return value != checker;
30290b57cec5SDimitry Andric }
30300b57cec5SDimitry Andric 
30310b57cec5SDimitry Andric kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
30320b57cec5SDimitry Andric   return value < checker;
30330b57cec5SDimitry Andric }
30340b57cec5SDimitry Andric 
30350b57cec5SDimitry Andric kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
30360b57cec5SDimitry Andric   return value >= checker;
30370b57cec5SDimitry Andric }
30380b57cec5SDimitry Andric 
30390b57cec5SDimitry Andric kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
30400b57cec5SDimitry Andric   return value <= checker;
30410b57cec5SDimitry Andric }
30420b57cec5SDimitry Andric 
30430b57cec5SDimitry Andric kmp_uint32
30440b57cec5SDimitry Andric __kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
30450b57cec5SDimitry Andric              kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
30460b57cec5SDimitry Andric              void *obj // Higher-level synchronization object, or NULL.
30470b57cec5SDimitry Andric ) {
30480b57cec5SDimitry Andric   // note: we may not belong to a team at this point
30490b57cec5SDimitry Andric   volatile kmp_uint32 *spin = spinner;
30500b57cec5SDimitry Andric   kmp_uint32 check = checker;
30510b57cec5SDimitry Andric   kmp_uint32 spins;
30520b57cec5SDimitry Andric   kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
30530b57cec5SDimitry Andric   kmp_uint32 r;
305404eeddc0SDimitry Andric   kmp_uint64 time;
30550b57cec5SDimitry Andric 
30560b57cec5SDimitry Andric   KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
30570b57cec5SDimitry Andric   KMP_INIT_YIELD(spins);
305804eeddc0SDimitry Andric   KMP_INIT_BACKOFF(time);
30590b57cec5SDimitry Andric   // main wait spin loop
30600b57cec5SDimitry Andric   while (!f(r = TCR_4(*spin), check)) {
30610b57cec5SDimitry Andric     KMP_FSYNC_SPIN_PREPARE(obj);
30620b57cec5SDimitry Andric     /* GEH - remove this since it was accidentally introduced when kmp_wait was
30630b57cec5SDimitry Andric        split. It causes problems with infinite recursion because of exit lock */
30640b57cec5SDimitry Andric     /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
30650b57cec5SDimitry Andric         __kmp_abort_thread(); */
306604eeddc0SDimitry Andric     KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
30670b57cec5SDimitry Andric   }
30680b57cec5SDimitry Andric   KMP_FSYNC_SPIN_ACQUIRED(obj);
30690b57cec5SDimitry Andric   return r;
30700b57cec5SDimitry Andric }
30710b57cec5SDimitry Andric 
30720b57cec5SDimitry Andric void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
30730b57cec5SDimitry Andric                       kmp_uint32 (*pred)(void *, kmp_uint32),
30740b57cec5SDimitry Andric                       void *obj // Higher-level synchronization object, or NULL.
30750b57cec5SDimitry Andric ) {
30760b57cec5SDimitry Andric   // note: we may not belong to a team at this point
30770b57cec5SDimitry Andric   void *spin = spinner;
30780b57cec5SDimitry Andric   kmp_uint32 check = checker;
30790b57cec5SDimitry Andric   kmp_uint32 spins;
30800b57cec5SDimitry Andric   kmp_uint32 (*f)(void *, kmp_uint32) = pred;
308104eeddc0SDimitry Andric   kmp_uint64 time;
30820b57cec5SDimitry Andric 
30830b57cec5SDimitry Andric   KMP_FSYNC_SPIN_INIT(obj, spin);
30840b57cec5SDimitry Andric   KMP_INIT_YIELD(spins);
308504eeddc0SDimitry Andric   KMP_INIT_BACKOFF(time);
30860b57cec5SDimitry Andric   // main wait spin loop
30870b57cec5SDimitry Andric   while (!f(spin, check)) {
30880b57cec5SDimitry Andric     KMP_FSYNC_SPIN_PREPARE(obj);
30890b57cec5SDimitry Andric     /* if we have waited a bit, or are noversubscribed, yield */
30900b57cec5SDimitry Andric     /* pause is in the following code */
309104eeddc0SDimitry Andric     KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
30920b57cec5SDimitry Andric   }
30930b57cec5SDimitry Andric   KMP_FSYNC_SPIN_ACQUIRED(obj);
30940b57cec5SDimitry Andric }
30950b57cec5SDimitry Andric 
30960b57cec5SDimitry Andric } // extern "C"
30970b57cec5SDimitry Andric 
30980b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT
30990b57cec5SDimitry Andric 
31000b57cec5SDimitry Andric void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
31010b57cec5SDimitry Andric                                enum sched_type schedule, kmp_int32 lb,
31020b57cec5SDimitry Andric                                kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
31030b57cec5SDimitry Andric                                int push_ws) {
31040b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
31050b57cec5SDimitry Andric                                  push_ws);
31060b57cec5SDimitry Andric }
31070b57cec5SDimitry Andric 
31080b57cec5SDimitry Andric void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
31090b57cec5SDimitry Andric                                 enum sched_type schedule, kmp_uint32 lb,
31100b57cec5SDimitry Andric                                 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
31110b57cec5SDimitry Andric                                 int push_ws) {
31120b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
31130b57cec5SDimitry Andric                                   push_ws);
31140b57cec5SDimitry Andric }
31150b57cec5SDimitry Andric 
31160b57cec5SDimitry Andric void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
31170b57cec5SDimitry Andric                                enum sched_type schedule, kmp_int64 lb,
31180b57cec5SDimitry Andric                                kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
31190b57cec5SDimitry Andric                                int push_ws) {
31200b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
31210b57cec5SDimitry Andric                                  push_ws);
31220b57cec5SDimitry Andric }
31230b57cec5SDimitry Andric 
31240b57cec5SDimitry Andric void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
31250b57cec5SDimitry Andric                                 enum sched_type schedule, kmp_uint64 lb,
31260b57cec5SDimitry Andric                                 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
31270b57cec5SDimitry Andric                                 int push_ws) {
31280b57cec5SDimitry Andric   __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
31290b57cec5SDimitry Andric                                   push_ws);
31300b57cec5SDimitry Andric }
31310b57cec5SDimitry Andric 
31320b57cec5SDimitry Andric void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) {
31330b57cec5SDimitry Andric   __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
31340b57cec5SDimitry Andric }
31350b57cec5SDimitry Andric 
31360b57cec5SDimitry Andric void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) {
31370b57cec5SDimitry Andric   __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
31380b57cec5SDimitry Andric }
31390b57cec5SDimitry Andric 
31400b57cec5SDimitry Andric void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) {
31410b57cec5SDimitry Andric   __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
31420b57cec5SDimitry Andric }
31430b57cec5SDimitry Andric 
31440b57cec5SDimitry Andric void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) {
31450b57cec5SDimitry Andric   __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
31460b57cec5SDimitry Andric }
31470b57cec5SDimitry Andric 
31480b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */
31490b57cec5SDimitry Andric 
31500b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
3151