10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch. 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric /* Dynamic scheduling initialization and dispatch. 140b57cec5SDimitry Andric * 150b57cec5SDimitry Andric * NOTE: __kmp_nth is a constant inside of any dispatch loop, however 160b57cec5SDimitry Andric * it may change values between parallel regions. __kmp_max_nth 170b57cec5SDimitry Andric * is the largest value __kmp_nth may take, 1 is the smallest. 180b57cec5SDimitry Andric */ 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric #include "kmp.h" 210b57cec5SDimitry Andric #include "kmp_error.h" 220b57cec5SDimitry Andric #include "kmp_i18n.h" 230b57cec5SDimitry Andric #include "kmp_itt.h" 240b57cec5SDimitry Andric #include "kmp_stats.h" 250b57cec5SDimitry Andric #include "kmp_str.h" 260b57cec5SDimitry Andric #if KMP_USE_X87CONTROL 270b57cec5SDimitry Andric #include <float.h> 280b57cec5SDimitry Andric #endif 290b57cec5SDimitry Andric #include "kmp_lock.h" 300b57cec5SDimitry Andric #include "kmp_dispatch.h" 310b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 320b57cec5SDimitry Andric #include "kmp_dispatch_hier.h" 330b57cec5SDimitry Andric #endif 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric #if OMPT_SUPPORT 360b57cec5SDimitry Andric #include "ompt-specific.h" 370b57cec5SDimitry Andric #endif 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 400b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 430b57cec5SDimitry Andric kmp_info_t *th; 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid_ref); 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 480b57cec5SDimitry Andric th = __kmp_threads[*gtid_ref]; 490b57cec5SDimitry Andric if (th->th.th_root->r.r_active && 500b57cec5SDimitry Andric (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) { 510b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 520b57cec5SDimitry Andric __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0); 530b57cec5SDimitry Andric #else 540b57cec5SDimitry Andric __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL); 550b57cec5SDimitry Andric #endif 560b57cec5SDimitry Andric } 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 610b57cec5SDimitry Andric kmp_info_t *th; 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 640b57cec5SDimitry Andric th = __kmp_threads[*gtid_ref]; 650b57cec5SDimitry Andric if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) { 660b57cec5SDimitry Andric __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref); 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric } 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric // Returns either SCHEDULE_MONOTONIC or SCHEDULE_NONMONOTONIC 72e8d8bef9SDimitry Andric static inline int __kmp_get_monotonicity(ident_t *loc, enum sched_type schedule, 730b57cec5SDimitry Andric bool use_hier = false) { 740b57cec5SDimitry Andric // Pick up the nonmonotonic/monotonic bits from the scheduling type 75349cc55cSDimitry Andric // Nonmonotonic as default for dynamic schedule when no modifier is specified 76349cc55cSDimitry Andric int monotonicity = SCHEDULE_NONMONOTONIC; 77e8d8bef9SDimitry Andric 78e8d8bef9SDimitry Andric // Let default be monotonic for executables 79e8d8bef9SDimitry Andric // compiled with OpenMP* 4.5 or less compilers 80fe6060f1SDimitry Andric if (loc != NULL && loc->get_openmp_version() < 50) 810b57cec5SDimitry Andric monotonicity = SCHEDULE_MONOTONIC; 82e8d8bef9SDimitry Andric 83fe6060f1SDimitry Andric if (use_hier || __kmp_force_monotonic) 84e8d8bef9SDimitry Andric monotonicity = SCHEDULE_MONOTONIC; 85e8d8bef9SDimitry Andric else if (SCHEDULE_HAS_NONMONOTONIC(schedule)) 860b57cec5SDimitry Andric monotonicity = SCHEDULE_NONMONOTONIC; 870b57cec5SDimitry Andric else if (SCHEDULE_HAS_MONOTONIC(schedule)) 880b57cec5SDimitry Andric monotonicity = SCHEDULE_MONOTONIC; 89e8d8bef9SDimitry Andric 900b57cec5SDimitry Andric return monotonicity; 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric 935f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED 945f757f3fSDimitry Andric // Return floating point number rounded to two decimal points 955f757f3fSDimitry Andric static inline float __kmp_round_2decimal_val(float num) { 965f757f3fSDimitry Andric return (float)(static_cast<int>(num * 100 + 0.5)) / 100; 975f757f3fSDimitry Andric } 985f757f3fSDimitry Andric static inline int __kmp_get_round_val(float num) { 995f757f3fSDimitry Andric return static_cast<int>(num < 0 ? num - 0.5 : num + 0.5); 1005f757f3fSDimitry Andric } 1015f757f3fSDimitry Andric #endif 1025f757f3fSDimitry Andric 1035f757f3fSDimitry Andric template <typename T> 1045f757f3fSDimitry Andric inline void 1055f757f3fSDimitry Andric __kmp_initialize_self_buffer(kmp_team_t *team, T id, 1065f757f3fSDimitry Andric dispatch_private_info_template<T> *pr, 1075f757f3fSDimitry Andric typename traits_t<T>::unsigned_t nchunks, T nproc, 1085f757f3fSDimitry Andric typename traits_t<T>::unsigned_t &init, 1095f757f3fSDimitry Andric T &small_chunk, T &extras, T &p_extra) { 1105f757f3fSDimitry Andric 1115f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED 1125f757f3fSDimitry Andric if (pr->flags.use_hybrid) { 1135f757f3fSDimitry Andric kmp_info_t *th = __kmp_threads[__kmp_gtid_from_tid((int)id, team)]; 1145f757f3fSDimitry Andric kmp_hw_core_type_t type = 1155f757f3fSDimitry Andric (kmp_hw_core_type_t)th->th.th_topology_attrs.core_type; 1165f757f3fSDimitry Andric T pchunks = pr->u.p.pchunks; 1175f757f3fSDimitry Andric T echunks = nchunks - pchunks; 1185f757f3fSDimitry Andric T num_procs_with_pcore = pr->u.p.num_procs_with_pcore; 1195f757f3fSDimitry Andric T num_procs_with_ecore = nproc - num_procs_with_pcore; 1205f757f3fSDimitry Andric T first_thread_with_ecore = pr->u.p.first_thread_with_ecore; 1215f757f3fSDimitry Andric T big_chunk = 1225f757f3fSDimitry Andric pchunks / num_procs_with_pcore; // chunks per thread with p-core 1235f757f3fSDimitry Andric small_chunk = 1245f757f3fSDimitry Andric echunks / num_procs_with_ecore; // chunks per thread with e-core 1255f757f3fSDimitry Andric 1265f757f3fSDimitry Andric extras = 1275f757f3fSDimitry Andric (pchunks % num_procs_with_pcore) + (echunks % num_procs_with_ecore); 1285f757f3fSDimitry Andric 1295f757f3fSDimitry Andric p_extra = (big_chunk - small_chunk); 1305f757f3fSDimitry Andric 1315f757f3fSDimitry Andric if (type == KMP_HW_CORE_TYPE_CORE) { 1325f757f3fSDimitry Andric if (id < first_thread_with_ecore) { 1335f757f3fSDimitry Andric init = id * small_chunk + id * p_extra + (id < extras ? id : extras); 1345f757f3fSDimitry Andric } else { 1355f757f3fSDimitry Andric init = id * small_chunk + (id - num_procs_with_ecore) * p_extra + 1365f757f3fSDimitry Andric (id < extras ? id : extras); 1375f757f3fSDimitry Andric } 1385f757f3fSDimitry Andric } else { 1395f757f3fSDimitry Andric if (id == first_thread_with_ecore) { 1405f757f3fSDimitry Andric init = id * small_chunk + id * p_extra + (id < extras ? id : extras); 1415f757f3fSDimitry Andric } else { 1425f757f3fSDimitry Andric init = id * small_chunk + first_thread_with_ecore * p_extra + 1435f757f3fSDimitry Andric (id < extras ? id : extras); 1445f757f3fSDimitry Andric } 1455f757f3fSDimitry Andric } 1465f757f3fSDimitry Andric p_extra = (type == KMP_HW_CORE_TYPE_CORE) ? p_extra : 0; 1475f757f3fSDimitry Andric return; 1485f757f3fSDimitry Andric } 1495f757f3fSDimitry Andric #endif 1505f757f3fSDimitry Andric 1515f757f3fSDimitry Andric small_chunk = nchunks / nproc; // chunks per thread 1525f757f3fSDimitry Andric extras = nchunks % nproc; 1535f757f3fSDimitry Andric p_extra = 0; 1545f757f3fSDimitry Andric init = id * small_chunk + (id < extras ? id : extras); 1555f757f3fSDimitry Andric } 1565f757f3fSDimitry Andric 157fe6060f1SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 158fe6060f1SDimitry Andric enum { // values for steal_flag (possible states of private per-loop buffer) 159fe6060f1SDimitry Andric UNUSED = 0, 160fe6060f1SDimitry Andric CLAIMED = 1, // owner thread started initialization 161fe6060f1SDimitry Andric READY = 2, // available for stealing 162fe6060f1SDimitry Andric THIEF = 3 // finished by owner, or claimed by thief 163fe6060f1SDimitry Andric // possible state changes: 164fe6060f1SDimitry Andric // 0 -> 1 owner only, sync 165fe6060f1SDimitry Andric // 0 -> 3 thief only, sync 166fe6060f1SDimitry Andric // 1 -> 2 owner only, async 167fe6060f1SDimitry Andric // 2 -> 3 owner only, async 168fe6060f1SDimitry Andric // 3 -> 2 owner only, async 169fe6060f1SDimitry Andric // 3 -> 0 last thread finishing the loop, async 170fe6060f1SDimitry Andric }; 171fe6060f1SDimitry Andric #endif 172fe6060f1SDimitry Andric 1730b57cec5SDimitry Andric // Initialize a dispatch_private_info_template<T> buffer for a particular 1740b57cec5SDimitry Andric // type of schedule,chunk. The loop description is found in lb (lower bound), 1750b57cec5SDimitry Andric // ub (upper bound), and st (stride). nproc is the number of threads relevant 1760b57cec5SDimitry Andric // to the scheduling (often the number of threads in a team, but not always if 1770b57cec5SDimitry Andric // hierarchical scheduling is used). tid is the id of the thread calling 1780b57cec5SDimitry Andric // the function within the group of nproc threads. It will have a value 1790b57cec5SDimitry Andric // between 0 and nproc - 1. This is often just the thread id within a team, but 1800b57cec5SDimitry Andric // is not necessarily the case when using hierarchical scheduling. 1810b57cec5SDimitry Andric // loc is the source file location of the corresponding loop 1820b57cec5SDimitry Andric // gtid is the global thread id 1830b57cec5SDimitry Andric template <typename T> 1840b57cec5SDimitry Andric void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, 1850b57cec5SDimitry Andric dispatch_private_info_template<T> *pr, 1860b57cec5SDimitry Andric enum sched_type schedule, T lb, T ub, 1870b57cec5SDimitry Andric typename traits_t<T>::signed_t st, 1880b57cec5SDimitry Andric #if USE_ITT_BUILD 1890b57cec5SDimitry Andric kmp_uint64 *cur_chunk, 1900b57cec5SDimitry Andric #endif 1910b57cec5SDimitry Andric typename traits_t<T>::signed_t chunk, 1920b57cec5SDimitry Andric T nproc, T tid) { 1930b57cec5SDimitry Andric typedef typename traits_t<T>::unsigned_t UT; 1940b57cec5SDimitry Andric typedef typename traits_t<T>::floating_t DBL; 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric int active; 1970b57cec5SDimitry Andric T tc; 1980b57cec5SDimitry Andric kmp_info_t *th; 1990b57cec5SDimitry Andric kmp_team_t *team; 2000b57cec5SDimitry Andric int monotonicity; 2010b57cec5SDimitry Andric bool use_hier; 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric #ifdef KMP_DEBUG 2040b57cec5SDimitry Andric typedef typename traits_t<T>::signed_t ST; 2050b57cec5SDimitry Andric { 2060b57cec5SDimitry Andric char *buff; 2070b57cec5SDimitry Andric // create format specifiers before the debug output 2080b57cec5SDimitry Andric buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d called " 2090b57cec5SDimitry Andric "pr:%%p lb:%%%s ub:%%%s st:%%%s " 2100b57cec5SDimitry Andric "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n", 2110b57cec5SDimitry Andric traits_t<T>::spec, traits_t<T>::spec, 2120b57cec5SDimitry Andric traits_t<ST>::spec, traits_t<ST>::spec, 2130b57cec5SDimitry Andric traits_t<T>::spec, traits_t<T>::spec); 2140b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid)); 2150b57cec5SDimitry Andric __kmp_str_free(&buff); 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric #endif 2180b57cec5SDimitry Andric /* setup data */ 2190b57cec5SDimitry Andric th = __kmp_threads[gtid]; 2200b57cec5SDimitry Andric team = th->th.th_team; 2210b57cec5SDimitry Andric active = !team->t.t_serialized; 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric #if USE_ITT_BUILD 2240b57cec5SDimitry Andric int itt_need_metadata_reporting = 2250b57cec5SDimitry Andric __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 2260b57cec5SDimitry Andric KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL && 2270b57cec5SDimitry Andric team->t.t_active_level == 1; 2280b57cec5SDimitry Andric #endif 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 2310b57cec5SDimitry Andric use_hier = pr->flags.use_hier; 2320b57cec5SDimitry Andric #else 2330b57cec5SDimitry Andric use_hier = false; 2340b57cec5SDimitry Andric #endif 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric /* Pick up the nonmonotonic/monotonic bits from the scheduling type */ 237e8d8bef9SDimitry Andric monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier); 2380b57cec5SDimitry Andric schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule); 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric /* Pick up the nomerge/ordered bits from the scheduling type */ 2410b57cec5SDimitry Andric if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) { 2420b57cec5SDimitry Andric pr->flags.nomerge = TRUE; 2430b57cec5SDimitry Andric schedule = 2440b57cec5SDimitry Andric (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower)); 2450b57cec5SDimitry Andric } else { 2460b57cec5SDimitry Andric pr->flags.nomerge = FALSE; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric pr->type_size = traits_t<T>::type_size; // remember the size of variables 2490b57cec5SDimitry Andric if (kmp_ord_lower & schedule) { 2500b57cec5SDimitry Andric pr->flags.ordered = TRUE; 2510b57cec5SDimitry Andric schedule = 2520b57cec5SDimitry Andric (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower)); 2530b57cec5SDimitry Andric } else { 2540b57cec5SDimitry Andric pr->flags.ordered = FALSE; 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric // Ordered overrides nonmonotonic 2570b57cec5SDimitry Andric if (pr->flags.ordered) { 2580b57cec5SDimitry Andric monotonicity = SCHEDULE_MONOTONIC; 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric if (schedule == kmp_sch_static) { 2620b57cec5SDimitry Andric schedule = __kmp_static; 2630b57cec5SDimitry Andric } else { 2640b57cec5SDimitry Andric if (schedule == kmp_sch_runtime) { 2650b57cec5SDimitry Andric // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if 2660b57cec5SDimitry Andric // not specified) 2670b57cec5SDimitry Andric schedule = team->t.t_sched.r_sched_type; 268e8d8bef9SDimitry Andric monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier); 2690b57cec5SDimitry Andric schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule); 270fe6060f1SDimitry Andric if (pr->flags.ordered) // correct monotonicity for ordered loop if needed 271fe6060f1SDimitry Andric monotonicity = SCHEDULE_MONOTONIC; 2720b57cec5SDimitry Andric // Detail the schedule if needed (global controls are differentiated 2730b57cec5SDimitry Andric // appropriately) 2740b57cec5SDimitry Andric if (schedule == kmp_sch_guided_chunked) { 2750b57cec5SDimitry Andric schedule = __kmp_guided; 2760b57cec5SDimitry Andric } else if (schedule == kmp_sch_static) { 2770b57cec5SDimitry Andric schedule = __kmp_static; 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric // Use the chunk size specified by OMP_SCHEDULE (or default if not 2800b57cec5SDimitry Andric // specified) 2810b57cec5SDimitry Andric chunk = team->t.t_sched.chunk; 2820b57cec5SDimitry Andric #if USE_ITT_BUILD 2830b57cec5SDimitry Andric if (cur_chunk) 2840b57cec5SDimitry Andric *cur_chunk = chunk; 2850b57cec5SDimitry Andric #endif 2860b57cec5SDimitry Andric #ifdef KMP_DEBUG 2870b57cec5SDimitry Andric { 2880b57cec5SDimitry Andric char *buff; 2890b57cec5SDimitry Andric // create format specifiers before the debug output 2900b57cec5SDimitry Andric buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d new: " 2910b57cec5SDimitry Andric "schedule:%%d chunk:%%%s\n", 2920b57cec5SDimitry Andric traits_t<ST>::spec); 2930b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, schedule, chunk)); 2940b57cec5SDimitry Andric __kmp_str_free(&buff); 2950b57cec5SDimitry Andric } 2960b57cec5SDimitry Andric #endif 2970b57cec5SDimitry Andric } else { 2980b57cec5SDimitry Andric if (schedule == kmp_sch_guided_chunked) { 2990b57cec5SDimitry Andric schedule = __kmp_guided; 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric if (chunk <= 0) { 3020b57cec5SDimitry Andric chunk = KMP_DEFAULT_CHUNK; 3030b57cec5SDimitry Andric } 3040b57cec5SDimitry Andric } 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric if (schedule == kmp_sch_auto) { 3070b57cec5SDimitry Andric // mapping and differentiation: in the __kmp_do_serial_initialize() 3080b57cec5SDimitry Andric schedule = __kmp_auto; 3090b57cec5SDimitry Andric #ifdef KMP_DEBUG 3100b57cec5SDimitry Andric { 3110b57cec5SDimitry Andric char *buff; 3120b57cec5SDimitry Andric // create format specifiers before the debug output 3130b57cec5SDimitry Andric buff = __kmp_str_format( 3140b57cec5SDimitry Andric "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: " 3150b57cec5SDimitry Andric "schedule:%%d chunk:%%%s\n", 3160b57cec5SDimitry Andric traits_t<ST>::spec); 3170b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, schedule, chunk)); 3180b57cec5SDimitry Andric __kmp_str_free(&buff); 3190b57cec5SDimitry Andric } 3200b57cec5SDimitry Andric #endif 3210b57cec5SDimitry Andric } 3220b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 3230b57cec5SDimitry Andric // map nonmonotonic:dynamic to static steal 3240b57cec5SDimitry Andric if (schedule == kmp_sch_dynamic_chunked) { 3250b57cec5SDimitry Andric if (monotonicity == SCHEDULE_NONMONOTONIC) 3260b57cec5SDimitry Andric schedule = kmp_sch_static_steal; 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric #endif 3290b57cec5SDimitry Andric /* guided analytical not safe for too many threads */ 3300b57cec5SDimitry Andric if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) { 3310b57cec5SDimitry Andric schedule = kmp_sch_guided_iterative_chunked; 3320b57cec5SDimitry Andric KMP_WARNING(DispatchManyThreads); 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric if (schedule == kmp_sch_runtime_simd) { 3350b57cec5SDimitry Andric // compiler provides simd_width in the chunk parameter 3360b57cec5SDimitry Andric schedule = team->t.t_sched.r_sched_type; 337e8d8bef9SDimitry Andric monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier); 3380b57cec5SDimitry Andric schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule); 3390b57cec5SDimitry Andric // Detail the schedule if needed (global controls are differentiated 3400b57cec5SDimitry Andric // appropriately) 3410b57cec5SDimitry Andric if (schedule == kmp_sch_static || schedule == kmp_sch_auto || 3420b57cec5SDimitry Andric schedule == __kmp_static) { 3430b57cec5SDimitry Andric schedule = kmp_sch_static_balanced_chunked; 3440b57cec5SDimitry Andric } else { 3450b57cec5SDimitry Andric if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) { 3460b57cec5SDimitry Andric schedule = kmp_sch_guided_simd; 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric chunk = team->t.t_sched.chunk * chunk; 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric #if USE_ITT_BUILD 3510b57cec5SDimitry Andric if (cur_chunk) 3520b57cec5SDimitry Andric *cur_chunk = chunk; 3530b57cec5SDimitry Andric #endif 3540b57cec5SDimitry Andric #ifdef KMP_DEBUG 3550b57cec5SDimitry Andric { 3560b57cec5SDimitry Andric char *buff; 3570b57cec5SDimitry Andric // create format specifiers before the debug output 3580b57cec5SDimitry Andric buff = __kmp_str_format( 3590b57cec5SDimitry Andric "__kmp_dispatch_init_algorithm: T#%%d new: schedule:%%d" 3600b57cec5SDimitry Andric " chunk:%%%s\n", 3610b57cec5SDimitry Andric traits_t<ST>::spec); 3620b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, schedule, chunk)); 3630b57cec5SDimitry Andric __kmp_str_free(&buff); 3640b57cec5SDimitry Andric } 3650b57cec5SDimitry Andric #endif 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric pr->u.p.parm1 = chunk; 3680b57cec5SDimitry Andric } 3690b57cec5SDimitry Andric KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper), 3700b57cec5SDimitry Andric "unknown scheduling type"); 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric pr->u.p.count = 0; 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 3750b57cec5SDimitry Andric if (st == 0) { 3760b57cec5SDimitry Andric __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, 3770b57cec5SDimitry Andric (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc); 3780b57cec5SDimitry Andric } 3790b57cec5SDimitry Andric } 3800b57cec5SDimitry Andric // compute trip count 3810b57cec5SDimitry Andric if (st == 1) { // most common case 3820b57cec5SDimitry Andric if (ub >= lb) { 3830b57cec5SDimitry Andric tc = ub - lb + 1; 3840b57cec5SDimitry Andric } else { // ub < lb 3850b57cec5SDimitry Andric tc = 0; // zero-trip 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric } else if (st < 0) { 3880b57cec5SDimitry Andric if (lb >= ub) { 3890b57cec5SDimitry Andric // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B), 3900b57cec5SDimitry Andric // where the division needs to be unsigned regardless of the result type 3910b57cec5SDimitry Andric tc = (UT)(lb - ub) / (-st) + 1; 3920b57cec5SDimitry Andric } else { // lb < ub 3930b57cec5SDimitry Andric tc = 0; // zero-trip 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric } else { // st > 0 3960b57cec5SDimitry Andric if (ub >= lb) { 3970b57cec5SDimitry Andric // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B), 3980b57cec5SDimitry Andric // where the division needs to be unsigned regardless of the result type 3990b57cec5SDimitry Andric tc = (UT)(ub - lb) / st + 1; 4000b57cec5SDimitry Andric } else { // ub < lb 4010b57cec5SDimitry Andric tc = 0; // zero-trip 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric } 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric #if KMP_STATS_ENABLED 4060b57cec5SDimitry Andric if (KMP_MASTER_GTID(gtid)) { 4070b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_loop_dynamic_total_iterations, tc); 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric #endif 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric pr->u.p.lb = lb; 4120b57cec5SDimitry Andric pr->u.p.ub = ub; 4130b57cec5SDimitry Andric pr->u.p.st = st; 4140b57cec5SDimitry Andric pr->u.p.tc = tc; 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric #if KMP_OS_WINDOWS 4170b57cec5SDimitry Andric pr->u.p.last_upper = ub + st; 4180b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric /* NOTE: only the active parallel region(s) has active ordered sections */ 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric if (active) { 4230b57cec5SDimitry Andric if (pr->flags.ordered) { 4240b57cec5SDimitry Andric pr->ordered_bumped = 0; 4250b57cec5SDimitry Andric pr->u.p.ordered_lower = 1; 4260b57cec5SDimitry Andric pr->u.p.ordered_upper = 0; 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric switch (schedule) { 431fe6060f1SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 4320b57cec5SDimitry Andric case kmp_sch_static_steal: { 4335f757f3fSDimitry Andric T ntc, init = 0; 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric KD_TRACE(100, 4360b57cec5SDimitry Andric ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n", 4370b57cec5SDimitry Andric gtid)); 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric ntc = (tc % chunk ? 1 : 0) + tc / chunk; 4400b57cec5SDimitry Andric if (nproc > 1 && ntc >= nproc) { 4410b57cec5SDimitry Andric KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL); 4420b57cec5SDimitry Andric T id = tid; 4435f757f3fSDimitry Andric T small_chunk, extras, p_extra = 0; 444fe6060f1SDimitry Andric kmp_uint32 old = UNUSED; 445fe6060f1SDimitry Andric int claimed = pr->steal_flag.compare_exchange_strong(old, CLAIMED); 446fe6060f1SDimitry Andric if (traits_t<T>::type_size > 4) { 447fe6060f1SDimitry Andric // AC: TODO: check if 16-byte CAS available and use it to 448fe6060f1SDimitry Andric // improve performance (probably wait for explicit request 449fe6060f1SDimitry Andric // before spending time on this). 450fe6060f1SDimitry Andric // For now use dynamically allocated per-private-buffer lock, 451fe6060f1SDimitry Andric // free memory in __kmp_dispatch_next when status==0. 452fe6060f1SDimitry Andric pr->u.p.steal_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t)); 453fe6060f1SDimitry Andric __kmp_init_lock(pr->u.p.steal_lock); 454fe6060f1SDimitry Andric } 4555f757f3fSDimitry Andric 4565f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED 4575f757f3fSDimitry Andric // Iterations are divided in a 60/40 skewed distribution among CORE and 4585f757f3fSDimitry Andric // ATOM processors for hybrid systems 4595f757f3fSDimitry Andric bool use_hybrid = false; 4605f757f3fSDimitry Andric kmp_hw_core_type_t core_type = KMP_HW_CORE_TYPE_UNKNOWN; 4615f757f3fSDimitry Andric T first_thread_with_ecore = 0; 4625f757f3fSDimitry Andric T num_procs_with_pcore = 0; 4635f757f3fSDimitry Andric T num_procs_with_ecore = 0; 4645f757f3fSDimitry Andric T p_ntc = 0, e_ntc = 0; 4655f757f3fSDimitry Andric if (__kmp_is_hybrid_cpu() && __kmp_affinity.type != affinity_none && 4665f757f3fSDimitry Andric __kmp_affinity.type != affinity_explicit) { 4675f757f3fSDimitry Andric use_hybrid = true; 4685f757f3fSDimitry Andric core_type = (kmp_hw_core_type_t)th->th.th_topology_attrs.core_type; 4695f757f3fSDimitry Andric if (core_type != KMP_HW_CORE_TYPE_UNKNOWN && 4705f757f3fSDimitry Andric __kmp_first_osid_with_ecore > -1) { 4715f757f3fSDimitry Andric for (int i = 0; i < team->t.t_nproc; ++i) { 4725f757f3fSDimitry Andric kmp_hw_core_type_t type = (kmp_hw_core_type_t)team->t.t_threads[i] 4735f757f3fSDimitry Andric ->th.th_topology_attrs.core_type; 4745f757f3fSDimitry Andric int id = team->t.t_threads[i]->th.th_topology_ids.os_id; 4755f757f3fSDimitry Andric if (id == __kmp_first_osid_with_ecore) { 4765f757f3fSDimitry Andric first_thread_with_ecore = 4775f757f3fSDimitry Andric team->t.t_threads[i]->th.th_info.ds.ds_tid; 4785f757f3fSDimitry Andric } 4795f757f3fSDimitry Andric if (type == KMP_HW_CORE_TYPE_CORE) { 4805f757f3fSDimitry Andric num_procs_with_pcore++; 4815f757f3fSDimitry Andric } else if (type == KMP_HW_CORE_TYPE_ATOM) { 4825f757f3fSDimitry Andric num_procs_with_ecore++; 4835f757f3fSDimitry Andric } else { 4845f757f3fSDimitry Andric use_hybrid = false; 4855f757f3fSDimitry Andric break; 4865f757f3fSDimitry Andric } 4875f757f3fSDimitry Andric } 4885f757f3fSDimitry Andric } 4895f757f3fSDimitry Andric if (num_procs_with_pcore > 0 && num_procs_with_ecore > 0) { 4905f757f3fSDimitry Andric float multiplier = 60.0 / 40.0; 4915f757f3fSDimitry Andric float p_ratio = (float)num_procs_with_pcore / nproc; 4925f757f3fSDimitry Andric float e_ratio = (float)num_procs_with_ecore / nproc; 4935f757f3fSDimitry Andric float e_multiplier = 4945f757f3fSDimitry Andric (float)1 / 4955f757f3fSDimitry Andric (((multiplier * num_procs_with_pcore) / nproc) + e_ratio); 4965f757f3fSDimitry Andric float p_multiplier = multiplier * e_multiplier; 4975f757f3fSDimitry Andric p_ntc = __kmp_get_round_val(ntc * p_ratio * p_multiplier); 4985f757f3fSDimitry Andric if ((int)p_ntc > (int)(ntc * p_ratio * p_multiplier)) 4995f757f3fSDimitry Andric e_ntc = 5005f757f3fSDimitry Andric (int)(__kmp_round_2decimal_val(ntc * e_ratio * e_multiplier)); 5015f757f3fSDimitry Andric else 5025f757f3fSDimitry Andric e_ntc = __kmp_get_round_val(ntc * e_ratio * e_multiplier); 5035f757f3fSDimitry Andric KMP_DEBUG_ASSERT(ntc == p_ntc + e_ntc); 5045f757f3fSDimitry Andric 5055f757f3fSDimitry Andric // Use regular static steal if not enough chunks for skewed 5065f757f3fSDimitry Andric // distribution 5075f757f3fSDimitry Andric use_hybrid = (use_hybrid && (p_ntc >= num_procs_with_pcore && 5085f757f3fSDimitry Andric e_ntc >= num_procs_with_ecore) 5095f757f3fSDimitry Andric ? true 5105f757f3fSDimitry Andric : false); 5115f757f3fSDimitry Andric } else { 5125f757f3fSDimitry Andric use_hybrid = false; 5135f757f3fSDimitry Andric } 5145f757f3fSDimitry Andric } 5155f757f3fSDimitry Andric pr->flags.use_hybrid = use_hybrid; 5165f757f3fSDimitry Andric pr->u.p.pchunks = p_ntc; 5175f757f3fSDimitry Andric pr->u.p.num_procs_with_pcore = num_procs_with_pcore; 5185f757f3fSDimitry Andric pr->u.p.first_thread_with_ecore = first_thread_with_ecore; 5195f757f3fSDimitry Andric 5205f757f3fSDimitry Andric if (use_hybrid) { 5215f757f3fSDimitry Andric KMP_DEBUG_ASSERT(nproc == num_procs_with_pcore + num_procs_with_ecore); 5225f757f3fSDimitry Andric T big_chunk = p_ntc / num_procs_with_pcore; 5235f757f3fSDimitry Andric small_chunk = e_ntc / num_procs_with_ecore; 5245f757f3fSDimitry Andric 5255f757f3fSDimitry Andric extras = 5265f757f3fSDimitry Andric (p_ntc % num_procs_with_pcore) + (e_ntc % num_procs_with_ecore); 5275f757f3fSDimitry Andric 5285f757f3fSDimitry Andric p_extra = (big_chunk - small_chunk); 5295f757f3fSDimitry Andric 5305f757f3fSDimitry Andric if (core_type == KMP_HW_CORE_TYPE_CORE) { 5315f757f3fSDimitry Andric if (id < first_thread_with_ecore) { 5325f757f3fSDimitry Andric init = 5335f757f3fSDimitry Andric id * small_chunk + id * p_extra + (id < extras ? id : extras); 5345f757f3fSDimitry Andric } else { 5355f757f3fSDimitry Andric init = id * small_chunk + (id - num_procs_with_ecore) * p_extra + 5365f757f3fSDimitry Andric (id < extras ? id : extras); 5375f757f3fSDimitry Andric } 5385f757f3fSDimitry Andric } else { 5395f757f3fSDimitry Andric if (id == first_thread_with_ecore) { 5405f757f3fSDimitry Andric init = 5415f757f3fSDimitry Andric id * small_chunk + id * p_extra + (id < extras ? id : extras); 5425f757f3fSDimitry Andric } else { 5435f757f3fSDimitry Andric init = id * small_chunk + first_thread_with_ecore * p_extra + 5445f757f3fSDimitry Andric (id < extras ? id : extras); 5455f757f3fSDimitry Andric } 5465f757f3fSDimitry Andric } 5475f757f3fSDimitry Andric p_extra = (core_type == KMP_HW_CORE_TYPE_CORE) ? p_extra : 0; 5485f757f3fSDimitry Andric } else 5495f757f3fSDimitry Andric #endif 5505f757f3fSDimitry Andric { 5510b57cec5SDimitry Andric small_chunk = ntc / nproc; 5520b57cec5SDimitry Andric extras = ntc % nproc; 5530b57cec5SDimitry Andric init = id * small_chunk + (id < extras ? id : extras); 5545f757f3fSDimitry Andric p_extra = 0; 5555f757f3fSDimitry Andric } 5560b57cec5SDimitry Andric pr->u.p.count = init; 557fe6060f1SDimitry Andric if (claimed) { // are we succeeded in claiming own buffer? 5585f757f3fSDimitry Andric pr->u.p.ub = init + small_chunk + p_extra + (id < extras ? 1 : 0); 559fe6060f1SDimitry Andric // Other threads will inspect steal_flag when searching for a victim. 560fe6060f1SDimitry Andric // READY means other threads may steal from this thread from now on. 561fe6060f1SDimitry Andric KMP_ATOMIC_ST_REL(&pr->steal_flag, READY); 562fe6060f1SDimitry Andric } else { 563fe6060f1SDimitry Andric // other thread has stolen whole our range 564fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(pr->steal_flag == THIEF); 565fe6060f1SDimitry Andric pr->u.p.ub = init; // mark there is no iterations to work on 5660b57cec5SDimitry Andric } 567fe6060f1SDimitry Andric pr->u.p.parm2 = ntc; // save number of chunks 568fe6060f1SDimitry Andric // parm3 is the number of times to attempt stealing which is 569fe6060f1SDimitry Andric // nproc (just a heuristics, could be optimized later on). 570fe6060f1SDimitry Andric pr->u.p.parm3 = nproc; 571fe6060f1SDimitry Andric pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid 5720b57cec5SDimitry Andric break; 5730b57cec5SDimitry Andric } else { 574480093f4SDimitry Andric /* too few chunks: switching to kmp_sch_dynamic_chunked */ 575480093f4SDimitry Andric schedule = kmp_sch_dynamic_chunked; 576480093f4SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to " 577480093f4SDimitry Andric "kmp_sch_dynamic_chunked\n", 5780b57cec5SDimitry Andric gtid)); 579fe6060f1SDimitry Andric goto dynamic_init; 580480093f4SDimitry Andric break; 5810b57cec5SDimitry Andric } // if 5820b57cec5SDimitry Andric } // case 5830b57cec5SDimitry Andric #endif 5840b57cec5SDimitry Andric case kmp_sch_static_balanced: { 5850b57cec5SDimitry Andric T init, limit; 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric KD_TRACE( 5880b57cec5SDimitry Andric 100, 5890b57cec5SDimitry Andric ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n", 5900b57cec5SDimitry Andric gtid)); 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric if (nproc > 1) { 5930b57cec5SDimitry Andric T id = tid; 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric if (tc < nproc) { 5960b57cec5SDimitry Andric if (id < tc) { 5970b57cec5SDimitry Andric init = id; 5980b57cec5SDimitry Andric limit = id; 5990b57cec5SDimitry Andric pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */ 6000b57cec5SDimitry Andric } else { 6010b57cec5SDimitry Andric pr->u.p.count = 1; /* means no more chunks to execute */ 6020b57cec5SDimitry Andric pr->u.p.parm1 = FALSE; 6030b57cec5SDimitry Andric break; 6040b57cec5SDimitry Andric } 6050b57cec5SDimitry Andric } else { 6060b57cec5SDimitry Andric T small_chunk = tc / nproc; 6070b57cec5SDimitry Andric T extras = tc % nproc; 6080b57cec5SDimitry Andric init = id * small_chunk + (id < extras ? id : extras); 6090b57cec5SDimitry Andric limit = init + small_chunk - (id < extras ? 0 : 1); 6100b57cec5SDimitry Andric pr->u.p.parm1 = (id == nproc - 1); 6110b57cec5SDimitry Andric } 6120b57cec5SDimitry Andric } else { 6130b57cec5SDimitry Andric if (tc > 0) { 6140b57cec5SDimitry Andric init = 0; 6150b57cec5SDimitry Andric limit = tc - 1; 6160b57cec5SDimitry Andric pr->u.p.parm1 = TRUE; 6170b57cec5SDimitry Andric } else { 6180b57cec5SDimitry Andric // zero trip count 6190b57cec5SDimitry Andric pr->u.p.count = 1; /* means no more chunks to execute */ 6200b57cec5SDimitry Andric pr->u.p.parm1 = FALSE; 6210b57cec5SDimitry Andric break; 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric } 6240b57cec5SDimitry Andric #if USE_ITT_BUILD 6250b57cec5SDimitry Andric // Calculate chunk for metadata report 6260b57cec5SDimitry Andric if (itt_need_metadata_reporting) 6270b57cec5SDimitry Andric if (cur_chunk) 6280b57cec5SDimitry Andric *cur_chunk = limit - init + 1; 6290b57cec5SDimitry Andric #endif 6300b57cec5SDimitry Andric if (st == 1) { 6310b57cec5SDimitry Andric pr->u.p.lb = lb + init; 6320b57cec5SDimitry Andric pr->u.p.ub = lb + limit; 6330b57cec5SDimitry Andric } else { 6340b57cec5SDimitry Andric // calculated upper bound, "ub" is user-defined upper bound 6350b57cec5SDimitry Andric T ub_tmp = lb + limit * st; 6360b57cec5SDimitry Andric pr->u.p.lb = lb + init * st; 6370b57cec5SDimitry Andric // adjust upper bound to "ub" if needed, so that MS lastprivate will match 6380b57cec5SDimitry Andric // it exactly 6390b57cec5SDimitry Andric if (st > 0) { 6400b57cec5SDimitry Andric pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp); 6410b57cec5SDimitry Andric } else { 6420b57cec5SDimitry Andric pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp); 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric } 6450b57cec5SDimitry Andric if (pr->flags.ordered) { 6460b57cec5SDimitry Andric pr->u.p.ordered_lower = init; 6470b57cec5SDimitry Andric pr->u.p.ordered_upper = limit; 6480b57cec5SDimitry Andric } 6490b57cec5SDimitry Andric break; 6500b57cec5SDimitry Andric } // case 6510b57cec5SDimitry Andric case kmp_sch_static_balanced_chunked: { 6520b57cec5SDimitry Andric // similar to balanced, but chunk adjusted to multiple of simd width 6530b57cec5SDimitry Andric T nth = nproc; 6540b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)" 6550b57cec5SDimitry Andric " -> falling-through to static_greedy\n", 6560b57cec5SDimitry Andric gtid)); 6570b57cec5SDimitry Andric schedule = kmp_sch_static_greedy; 6580b57cec5SDimitry Andric if (nth > 1) 6590b57cec5SDimitry Andric pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1); 6600b57cec5SDimitry Andric else 6610b57cec5SDimitry Andric pr->u.p.parm1 = tc; 6620b57cec5SDimitry Andric break; 6630b57cec5SDimitry Andric } // case 6640b57cec5SDimitry Andric case kmp_sch_guided_simd: 6650b57cec5SDimitry Andric case kmp_sch_guided_iterative_chunked: { 6660b57cec5SDimitry Andric KD_TRACE( 6670b57cec5SDimitry Andric 100, 6680b57cec5SDimitry Andric ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" 6690b57cec5SDimitry Andric " case\n", 6700b57cec5SDimitry Andric gtid)); 6710b57cec5SDimitry Andric 6720b57cec5SDimitry Andric if (nproc > 1) { 6730b57cec5SDimitry Andric if ((2L * chunk + 1) * nproc >= tc) { 6740b57cec5SDimitry Andric /* chunk size too large, switch to dynamic */ 6750b57cec5SDimitry Andric schedule = kmp_sch_dynamic_chunked; 676fe6060f1SDimitry Andric goto dynamic_init; 6770b57cec5SDimitry Andric } else { 6780b57cec5SDimitry Andric // when remaining iters become less than parm2 - switch to dynamic 6790b57cec5SDimitry Andric pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1); 6800b57cec5SDimitry Andric *(double *)&pr->u.p.parm3 = 681e8d8bef9SDimitry Andric guided_flt_param / (double)nproc; // may occupy parm3 and parm4 6820b57cec5SDimitry Andric } 6830b57cec5SDimitry Andric } else { 6840b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to " 6850b57cec5SDimitry Andric "kmp_sch_static_greedy\n", 6860b57cec5SDimitry Andric gtid)); 6870b57cec5SDimitry Andric schedule = kmp_sch_static_greedy; 6880b57cec5SDimitry Andric /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ 6890b57cec5SDimitry Andric KD_TRACE( 6900b57cec5SDimitry Andric 100, 6910b57cec5SDimitry Andric ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n", 6920b57cec5SDimitry Andric gtid)); 6930b57cec5SDimitry Andric pr->u.p.parm1 = tc; 6940b57cec5SDimitry Andric } // if 6950b57cec5SDimitry Andric } // case 6960b57cec5SDimitry Andric break; 6970b57cec5SDimitry Andric case kmp_sch_guided_analytical_chunked: { 6980b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d " 6990b57cec5SDimitry Andric "kmp_sch_guided_analytical_chunked case\n", 7000b57cec5SDimitry Andric gtid)); 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric if (nproc > 1) { 7030b57cec5SDimitry Andric if ((2L * chunk + 1) * nproc >= tc) { 7040b57cec5SDimitry Andric /* chunk size too large, switch to dynamic */ 7050b57cec5SDimitry Andric schedule = kmp_sch_dynamic_chunked; 706fe6060f1SDimitry Andric goto dynamic_init; 7070b57cec5SDimitry Andric } else { 7080b57cec5SDimitry Andric /* commonly used term: (2 nproc - 1)/(2 nproc) */ 7090b57cec5SDimitry Andric DBL x; 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric #if KMP_USE_X87CONTROL 7120b57cec5SDimitry Andric /* Linux* OS already has 64-bit computation by default for long double, 7130b57cec5SDimitry Andric and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On 7140b57cec5SDimitry Andric Windows* OS on IA-32 architecture, we need to set precision to 64-bit 7150b57cec5SDimitry Andric instead of the default 53-bit. Even though long double doesn't work 7160b57cec5SDimitry Andric on Windows* OS on Intel(R) 64, the resulting lack of precision is not 7170b57cec5SDimitry Andric expected to impact the correctness of the algorithm, but this has not 7180b57cec5SDimitry Andric been mathematically proven. */ 7190b57cec5SDimitry Andric // save original FPCW and set precision to 64-bit, as 7200b57cec5SDimitry Andric // Windows* OS on IA-32 architecture defaults to 53-bit 7210b57cec5SDimitry Andric unsigned int oldFpcw = _control87(0, 0); 7220b57cec5SDimitry Andric _control87(_PC_64, _MCW_PC); // 0,0x30000 7230b57cec5SDimitry Andric #endif 7240b57cec5SDimitry Andric /* value used for comparison in solver for cross-over point */ 725349cc55cSDimitry Andric KMP_ASSERT(tc > 0); 7260b57cec5SDimitry Andric long double target = ((long double)chunk * 2 + 1) * nproc / tc; 7270b57cec5SDimitry Andric 7280b57cec5SDimitry Andric /* crossover point--chunk indexes equal to or greater than 7290b57cec5SDimitry Andric this point switch to dynamic-style scheduling */ 7300b57cec5SDimitry Andric UT cross; 7310b57cec5SDimitry Andric 7320b57cec5SDimitry Andric /* commonly used term: (2 nproc - 1)/(2 nproc) */ 733e8d8bef9SDimitry Andric x = 1.0 - 0.5 / (double)nproc; 7340b57cec5SDimitry Andric 7350b57cec5SDimitry Andric #ifdef KMP_DEBUG 7360b57cec5SDimitry Andric { // test natural alignment 7370b57cec5SDimitry Andric struct _test_a { 7380b57cec5SDimitry Andric char a; 7390b57cec5SDimitry Andric union { 7400b57cec5SDimitry Andric char b; 7410b57cec5SDimitry Andric DBL d; 7420b57cec5SDimitry Andric }; 7430b57cec5SDimitry Andric } t; 7440b57cec5SDimitry Andric ptrdiff_t natural_alignment = 7450b57cec5SDimitry Andric (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1; 7460b57cec5SDimitry Andric //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long 7470b57cec5SDimitry Andric // long)natural_alignment ); 7480b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 7490b57cec5SDimitry Andric (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0); 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric #endif // KMP_DEBUG 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric /* save the term in thread private dispatch structure */ 7540b57cec5SDimitry Andric *(DBL *)&pr->u.p.parm3 = x; 7550b57cec5SDimitry Andric 7560b57cec5SDimitry Andric /* solve for the crossover point to the nearest integer i for which C_i 7570b57cec5SDimitry Andric <= chunk */ 7580b57cec5SDimitry Andric { 7590b57cec5SDimitry Andric UT left, right, mid; 7600b57cec5SDimitry Andric long double p; 7610b57cec5SDimitry Andric 7620b57cec5SDimitry Andric /* estimate initial upper and lower bound */ 7630b57cec5SDimitry Andric 7640b57cec5SDimitry Andric /* doesn't matter what value right is as long as it is positive, but 7650b57cec5SDimitry Andric it affects performance of the solver */ 7660b57cec5SDimitry Andric right = 229; 7670b57cec5SDimitry Andric p = __kmp_pow<UT>(x, right); 7680b57cec5SDimitry Andric if (p > target) { 7690b57cec5SDimitry Andric do { 7700b57cec5SDimitry Andric p *= p; 7710b57cec5SDimitry Andric right <<= 1; 7720b57cec5SDimitry Andric } while (p > target && right < (1 << 27)); 7730b57cec5SDimitry Andric /* lower bound is previous (failed) estimate of upper bound */ 7740b57cec5SDimitry Andric left = right >> 1; 7750b57cec5SDimitry Andric } else { 7760b57cec5SDimitry Andric left = 0; 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric /* bisection root-finding method */ 7800b57cec5SDimitry Andric while (left + 1 < right) { 7810b57cec5SDimitry Andric mid = (left + right) / 2; 7820b57cec5SDimitry Andric if (__kmp_pow<UT>(x, mid) > target) { 7830b57cec5SDimitry Andric left = mid; 7840b57cec5SDimitry Andric } else { 7850b57cec5SDimitry Andric right = mid; 7860b57cec5SDimitry Andric } 7870b57cec5SDimitry Andric } // while 7880b57cec5SDimitry Andric cross = right; 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric /* assert sanity of computed crossover point */ 7910b57cec5SDimitry Andric KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target && 7920b57cec5SDimitry Andric __kmp_pow<UT>(x, cross) <= target); 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric /* save the crossover point in thread private dispatch structure */ 7950b57cec5SDimitry Andric pr->u.p.parm2 = cross; 7960b57cec5SDimitry Andric 7970b57cec5SDimitry Andric // C75803 7980b57cec5SDimitry Andric #if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8)) 7990b57cec5SDimitry Andric #define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3) 8000b57cec5SDimitry Andric #else 8010b57cec5SDimitry Andric #define GUIDED_ANALYTICAL_WORKAROUND (x) 8020b57cec5SDimitry Andric #endif 8030b57cec5SDimitry Andric /* dynamic-style scheduling offset */ 804fe6060f1SDimitry Andric pr->u.p.count = tc - 805fe6060f1SDimitry Andric __kmp_dispatch_guided_remaining( 8060b57cec5SDimitry Andric tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - 8070b57cec5SDimitry Andric cross * chunk; 8080b57cec5SDimitry Andric #if KMP_USE_X87CONTROL 8090b57cec5SDimitry Andric // restore FPCW 8100b57cec5SDimitry Andric _control87(oldFpcw, _MCW_PC); 8110b57cec5SDimitry Andric #endif 8120b57cec5SDimitry Andric } // if 8130b57cec5SDimitry Andric } else { 8140b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to " 8150b57cec5SDimitry Andric "kmp_sch_static_greedy\n", 8160b57cec5SDimitry Andric gtid)); 8170b57cec5SDimitry Andric schedule = kmp_sch_static_greedy; 8180b57cec5SDimitry Andric /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ 8190b57cec5SDimitry Andric pr->u.p.parm1 = tc; 8200b57cec5SDimitry Andric } // if 8210b57cec5SDimitry Andric } // case 8220b57cec5SDimitry Andric break; 8230b57cec5SDimitry Andric case kmp_sch_static_greedy: 8240b57cec5SDimitry Andric KD_TRACE( 8250b57cec5SDimitry Andric 100, 8260b57cec5SDimitry Andric ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n", 8270b57cec5SDimitry Andric gtid)); 8280b57cec5SDimitry Andric pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc; 8290b57cec5SDimitry Andric break; 8300b57cec5SDimitry Andric case kmp_sch_static_chunked: 8310b57cec5SDimitry Andric case kmp_sch_dynamic_chunked: 832fe6060f1SDimitry Andric dynamic_init: 833349cc55cSDimitry Andric if (tc == 0) 834349cc55cSDimitry Andric break; 835fe6060f1SDimitry Andric if (pr->u.p.parm1 <= 0) 8360b57cec5SDimitry Andric pr->u.p.parm1 = KMP_DEFAULT_CHUNK; 837fe6060f1SDimitry Andric else if (pr->u.p.parm1 > tc) 838fe6060f1SDimitry Andric pr->u.p.parm1 = tc; 839fe6060f1SDimitry Andric // Store the total number of chunks to prevent integer overflow during 840fe6060f1SDimitry Andric // bounds calculations in the get next chunk routine. 841fe6060f1SDimitry Andric pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0); 8420b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d " 8430b57cec5SDimitry Andric "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", 8440b57cec5SDimitry Andric gtid)); 8450b57cec5SDimitry Andric break; 8460b57cec5SDimitry Andric case kmp_sch_trapezoidal: { 8470b57cec5SDimitry Andric /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */ 8480b57cec5SDimitry Andric 8490b57cec5SDimitry Andric T parm1, parm2, parm3, parm4; 8500b57cec5SDimitry Andric KD_TRACE(100, 8510b57cec5SDimitry Andric ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n", 8520b57cec5SDimitry Andric gtid)); 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric parm1 = chunk; 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric /* F : size of the first cycle */ 8570b57cec5SDimitry Andric parm2 = (tc / (2 * nproc)); 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric if (parm2 < 1) { 8600b57cec5SDimitry Andric parm2 = 1; 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric /* L : size of the last cycle. Make sure the last cycle is not larger 8640b57cec5SDimitry Andric than the first cycle. */ 8650b57cec5SDimitry Andric if (parm1 < 1) { 8660b57cec5SDimitry Andric parm1 = 1; 8670b57cec5SDimitry Andric } else if (parm1 > parm2) { 8680b57cec5SDimitry Andric parm1 = parm2; 8690b57cec5SDimitry Andric } 8700b57cec5SDimitry Andric 8710b57cec5SDimitry Andric /* N : number of cycles */ 8720b57cec5SDimitry Andric parm3 = (parm2 + parm1); 8730b57cec5SDimitry Andric parm3 = (2 * tc + parm3 - 1) / parm3; 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric if (parm3 < 2) { 8760b57cec5SDimitry Andric parm3 = 2; 8770b57cec5SDimitry Andric } 8780b57cec5SDimitry Andric 8790b57cec5SDimitry Andric /* sigma : decreasing incr of the trapezoid */ 8800b57cec5SDimitry Andric parm4 = (parm3 - 1); 8810b57cec5SDimitry Andric parm4 = (parm2 - parm1) / parm4; 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric // pointless check, because parm4 >= 0 always 8840b57cec5SDimitry Andric // if ( parm4 < 0 ) { 8850b57cec5SDimitry Andric // parm4 = 0; 8860b57cec5SDimitry Andric //} 8870b57cec5SDimitry Andric 8880b57cec5SDimitry Andric pr->u.p.parm1 = parm1; 8890b57cec5SDimitry Andric pr->u.p.parm2 = parm2; 8900b57cec5SDimitry Andric pr->u.p.parm3 = parm3; 8910b57cec5SDimitry Andric pr->u.p.parm4 = parm4; 8920b57cec5SDimitry Andric } // case 8930b57cec5SDimitry Andric break; 8940b57cec5SDimitry Andric 8950b57cec5SDimitry Andric default: { 8960b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected), // Primary message 8970b57cec5SDimitry Andric KMP_HNT(GetNewerLibrary), // Hint 8980b57cec5SDimitry Andric __kmp_msg_null // Variadic argument list terminator 8990b57cec5SDimitry Andric ); 9000b57cec5SDimitry Andric } break; 9010b57cec5SDimitry Andric } // switch 9020b57cec5SDimitry Andric pr->schedule = schedule; 9030b57cec5SDimitry Andric } 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 9060b57cec5SDimitry Andric template <typename T> 9070b57cec5SDimitry Andric inline void __kmp_dispatch_init_hier_runtime(ident_t *loc, T lb, T ub, 9080b57cec5SDimitry Andric typename traits_t<T>::signed_t st); 9090b57cec5SDimitry Andric template <> 9100b57cec5SDimitry Andric inline void 9110b57cec5SDimitry Andric __kmp_dispatch_init_hier_runtime<kmp_int32>(ident_t *loc, kmp_int32 lb, 9120b57cec5SDimitry Andric kmp_int32 ub, kmp_int32 st) { 9130b57cec5SDimitry Andric __kmp_dispatch_init_hierarchy<kmp_int32>( 9140b57cec5SDimitry Andric loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, 9150b57cec5SDimitry Andric __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st); 9160b57cec5SDimitry Andric } 9170b57cec5SDimitry Andric template <> 9180b57cec5SDimitry Andric inline void 9190b57cec5SDimitry Andric __kmp_dispatch_init_hier_runtime<kmp_uint32>(ident_t *loc, kmp_uint32 lb, 9200b57cec5SDimitry Andric kmp_uint32 ub, kmp_int32 st) { 9210b57cec5SDimitry Andric __kmp_dispatch_init_hierarchy<kmp_uint32>( 9220b57cec5SDimitry Andric loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, 9230b57cec5SDimitry Andric __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st); 9240b57cec5SDimitry Andric } 9250b57cec5SDimitry Andric template <> 9260b57cec5SDimitry Andric inline void 9270b57cec5SDimitry Andric __kmp_dispatch_init_hier_runtime<kmp_int64>(ident_t *loc, kmp_int64 lb, 9280b57cec5SDimitry Andric kmp_int64 ub, kmp_int64 st) { 9290b57cec5SDimitry Andric __kmp_dispatch_init_hierarchy<kmp_int64>( 9300b57cec5SDimitry Andric loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, 9310b57cec5SDimitry Andric __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st); 9320b57cec5SDimitry Andric } 9330b57cec5SDimitry Andric template <> 9340b57cec5SDimitry Andric inline void 9350b57cec5SDimitry Andric __kmp_dispatch_init_hier_runtime<kmp_uint64>(ident_t *loc, kmp_uint64 lb, 9360b57cec5SDimitry Andric kmp_uint64 ub, kmp_int64 st) { 9370b57cec5SDimitry Andric __kmp_dispatch_init_hierarchy<kmp_uint64>( 9380b57cec5SDimitry Andric loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers, 9390b57cec5SDimitry Andric __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st); 9400b57cec5SDimitry Andric } 9410b57cec5SDimitry Andric 9420b57cec5SDimitry Andric // free all the hierarchy scheduling memory associated with the team 9430b57cec5SDimitry Andric void __kmp_dispatch_free_hierarchies(kmp_team_t *team) { 9440b57cec5SDimitry Andric int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 9450b57cec5SDimitry Andric for (int i = 0; i < num_disp_buff; ++i) { 9460b57cec5SDimitry Andric // type does not matter here so use kmp_int32 9470b57cec5SDimitry Andric auto sh = 9480b57cec5SDimitry Andric reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>( 9490b57cec5SDimitry Andric &team->t.t_disp_buffer[i]); 9500b57cec5SDimitry Andric if (sh->hier) { 9510b57cec5SDimitry Andric sh->hier->deallocate(); 9520b57cec5SDimitry Andric __kmp_free(sh->hier); 9530b57cec5SDimitry Andric } 9540b57cec5SDimitry Andric } 9550b57cec5SDimitry Andric } 9560b57cec5SDimitry Andric #endif 9570b57cec5SDimitry Andric 9580b57cec5SDimitry Andric // UT - unsigned flavor of T, ST - signed flavor of T, 9590b57cec5SDimitry Andric // DBL - double if sizeof(T)==4, or long double if sizeof(T)==8 9600b57cec5SDimitry Andric template <typename T> 9610b57cec5SDimitry Andric static void 9620b57cec5SDimitry Andric __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb, 9630b57cec5SDimitry Andric T ub, typename traits_t<T>::signed_t st, 9640b57cec5SDimitry Andric typename traits_t<T>::signed_t chunk, int push_ws) { 9650b57cec5SDimitry Andric typedef typename traits_t<T>::unsigned_t UT; 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric int active; 9680b57cec5SDimitry Andric kmp_info_t *th; 9690b57cec5SDimitry Andric kmp_team_t *team; 9700b57cec5SDimitry Andric kmp_uint32 my_buffer_index; 9710b57cec5SDimitry Andric dispatch_private_info_template<T> *pr; 9720b57cec5SDimitry Andric dispatch_shared_info_template<T> volatile *sh; 9730b57cec5SDimitry Andric 9740b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template<T>) == 9750b57cec5SDimitry Andric sizeof(dispatch_private_info)); 9760b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template<UT>) == 9770b57cec5SDimitry Andric sizeof(dispatch_shared_info)); 978e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 9790b57cec5SDimitry Andric 9800b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 9810b57cec5SDimitry Andric __kmp_parallel_initialize(); 9820b57cec5SDimitry Andric 9830b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 9840b57cec5SDimitry Andric 9850b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 9860b57cec5SDimitry Andric SSC_MARK_DISPATCH_INIT(); 9870b57cec5SDimitry Andric #endif 9880b57cec5SDimitry Andric #ifdef KMP_DEBUG 9890b57cec5SDimitry Andric typedef typename traits_t<T>::signed_t ST; 9900b57cec5SDimitry Andric { 9910b57cec5SDimitry Andric char *buff; 9920b57cec5SDimitry Andric // create format specifiers before the debug output 9930b57cec5SDimitry Andric buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d " 9940b57cec5SDimitry Andric "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n", 9950b57cec5SDimitry Andric traits_t<ST>::spec, traits_t<T>::spec, 9960b57cec5SDimitry Andric traits_t<T>::spec, traits_t<ST>::spec); 9970b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st)); 9980b57cec5SDimitry Andric __kmp_str_free(&buff); 9990b57cec5SDimitry Andric } 10000b57cec5SDimitry Andric #endif 10010b57cec5SDimitry Andric /* setup data */ 10020b57cec5SDimitry Andric th = __kmp_threads[gtid]; 10030b57cec5SDimitry Andric team = th->th.th_team; 10040b57cec5SDimitry Andric active = !team->t.t_serialized; 10050b57cec5SDimitry Andric th->th.th_ident = loc; 10060b57cec5SDimitry Andric 10070b57cec5SDimitry Andric // Any half-decent optimizer will remove this test when the blocks are empty 10080b57cec5SDimitry Andric // since the macros expand to nothing 10090b57cec5SDimitry Andric // when statistics are disabled. 10100b57cec5SDimitry Andric if (schedule == __kmp_static) { 10110b57cec5SDimitry Andric KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 10120b57cec5SDimitry Andric } else { 10130b57cec5SDimitry Andric KMP_COUNT_BLOCK(OMP_LOOP_DYNAMIC); 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 10170b57cec5SDimitry Andric // Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable 10180b57cec5SDimitry Andric // Hierarchical scheduling does not work with ordered, so if ordered is 10190b57cec5SDimitry Andric // detected, then revert back to threaded scheduling. 10200b57cec5SDimitry Andric bool ordered; 10210b57cec5SDimitry Andric enum sched_type my_sched = schedule; 10220b57cec5SDimitry Andric my_buffer_index = th->th.th_dispatch->th_disp_index; 10230b57cec5SDimitry Andric pr = reinterpret_cast<dispatch_private_info_template<T> *>( 10240b57cec5SDimitry Andric &th->th.th_dispatch 10250b57cec5SDimitry Andric ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); 10260b57cec5SDimitry Andric my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched); 10270b57cec5SDimitry Andric if ((my_sched >= kmp_nm_lower) && (my_sched < kmp_nm_upper)) 10280b57cec5SDimitry Andric my_sched = 10290b57cec5SDimitry Andric (enum sched_type)(((int)my_sched) - (kmp_nm_lower - kmp_sch_lower)); 10300b57cec5SDimitry Andric ordered = (kmp_ord_lower & my_sched); 10310b57cec5SDimitry Andric if (pr->flags.use_hier) { 10320b57cec5SDimitry Andric if (ordered) { 10330b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_init: T#%d ordered loop detected. " 10340b57cec5SDimitry Andric "Disabling hierarchical scheduling.\n", 10350b57cec5SDimitry Andric gtid)); 10360b57cec5SDimitry Andric pr->flags.use_hier = FALSE; 10370b57cec5SDimitry Andric } 10380b57cec5SDimitry Andric } 10390b57cec5SDimitry Andric if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) { 10400b57cec5SDimitry Andric // Don't use hierarchical for ordered parallel loops and don't 10410b57cec5SDimitry Andric // use the runtime hierarchy if one was specified in the program 10420b57cec5SDimitry Andric if (!ordered && !pr->flags.use_hier) 10430b57cec5SDimitry Andric __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st); 10440b57cec5SDimitry Andric } 10450b57cec5SDimitry Andric #endif // KMP_USE_HIER_SCHED 10460b57cec5SDimitry Andric 10470b57cec5SDimitry Andric #if USE_ITT_BUILD 10480b57cec5SDimitry Andric kmp_uint64 cur_chunk = chunk; 10490b57cec5SDimitry Andric int itt_need_metadata_reporting = 10500b57cec5SDimitry Andric __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 10510b57cec5SDimitry Andric KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL && 10520b57cec5SDimitry Andric team->t.t_active_level == 1; 10530b57cec5SDimitry Andric #endif 10540b57cec5SDimitry Andric if (!active) { 10550b57cec5SDimitry Andric pr = reinterpret_cast<dispatch_private_info_template<T> *>( 10560b57cec5SDimitry Andric th->th.th_dispatch->th_disp_buffer); /* top of the stack */ 10570b57cec5SDimitry Andric } else { 10580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th->th.th_dispatch == 10590b57cec5SDimitry Andric &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); 10600b57cec5SDimitry Andric 10610b57cec5SDimitry Andric my_buffer_index = th->th.th_dispatch->th_disp_index++; 10620b57cec5SDimitry Andric 10630b57cec5SDimitry Andric /* What happens when number of threads changes, need to resize buffer? */ 10640b57cec5SDimitry Andric pr = reinterpret_cast<dispatch_private_info_template<T> *>( 10650b57cec5SDimitry Andric &th->th.th_dispatch 10660b57cec5SDimitry Andric ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); 10670b57cec5SDimitry Andric sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>( 10680b57cec5SDimitry Andric &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); 10690b57cec5SDimitry Andric KD_TRACE(10, ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid, 10700b57cec5SDimitry Andric my_buffer_index)); 1071fe6060f1SDimitry Andric if (sh->buffer_index != my_buffer_index) { // too many loops in progress? 1072fe6060f1SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d" 1073fe6060f1SDimitry Andric " sh->buffer_index:%d\n", 1074fe6060f1SDimitry Andric gtid, my_buffer_index, sh->buffer_index)); 1075fe6060f1SDimitry Andric __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index, 1076fe6060f1SDimitry Andric __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL)); 1077fe6060f1SDimitry Andric // Note: KMP_WAIT() cannot be used there: buffer index and 1078fe6060f1SDimitry Andric // my_buffer_index are *always* 32-bit integers. 1079fe6060f1SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " 1080fe6060f1SDimitry Andric "sh->buffer_index:%d\n", 1081fe6060f1SDimitry Andric gtid, my_buffer_index, sh->buffer_index)); 1082fe6060f1SDimitry Andric } 10830b57cec5SDimitry Andric } 10840b57cec5SDimitry Andric 10850b57cec5SDimitry Andric __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st, 10860b57cec5SDimitry Andric #if USE_ITT_BUILD 10870b57cec5SDimitry Andric &cur_chunk, 10880b57cec5SDimitry Andric #endif 10890b57cec5SDimitry Andric chunk, (T)th->th.th_team_nproc, 10900b57cec5SDimitry Andric (T)th->th.th_info.ds.ds_tid); 10910b57cec5SDimitry Andric if (active) { 10920b57cec5SDimitry Andric if (pr->flags.ordered == 0) { 10930b57cec5SDimitry Andric th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error; 10940b57cec5SDimitry Andric th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error; 10950b57cec5SDimitry Andric } else { 10960b57cec5SDimitry Andric th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>; 10970b57cec5SDimitry Andric th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>; 10980b57cec5SDimitry Andric } 10990b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr; 11000b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_sh_current = 11010b57cec5SDimitry Andric CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh); 11020b57cec5SDimitry Andric #if USE_ITT_BUILD 11030b57cec5SDimitry Andric if (pr->flags.ordered) { 11040b57cec5SDimitry Andric __kmp_itt_ordered_init(gtid); 11050b57cec5SDimitry Andric } 11060b57cec5SDimitry Andric // Report loop metadata 11070b57cec5SDimitry Andric if (itt_need_metadata_reporting) { 1108fe6060f1SDimitry Andric // Only report metadata by primary thread of active team at level 1 11090b57cec5SDimitry Andric kmp_uint64 schedtype = 0; 11100b57cec5SDimitry Andric switch (schedule) { 11110b57cec5SDimitry Andric case kmp_sch_static_chunked: 11120b57cec5SDimitry Andric case kmp_sch_static_balanced: // Chunk is calculated in the switch above 11130b57cec5SDimitry Andric break; 11140b57cec5SDimitry Andric case kmp_sch_static_greedy: 11150b57cec5SDimitry Andric cur_chunk = pr->u.p.parm1; 11160b57cec5SDimitry Andric break; 11170b57cec5SDimitry Andric case kmp_sch_dynamic_chunked: 11180b57cec5SDimitry Andric schedtype = 1; 11190b57cec5SDimitry Andric break; 11200b57cec5SDimitry Andric case kmp_sch_guided_iterative_chunked: 11210b57cec5SDimitry Andric case kmp_sch_guided_analytical_chunked: 11220b57cec5SDimitry Andric case kmp_sch_guided_simd: 11230b57cec5SDimitry Andric schedtype = 2; 11240b57cec5SDimitry Andric break; 11250b57cec5SDimitry Andric default: 11260b57cec5SDimitry Andric // Should we put this case under "static"? 11270b57cec5SDimitry Andric // case kmp_sch_static_steal: 11280b57cec5SDimitry Andric schedtype = 3; 11290b57cec5SDimitry Andric break; 11300b57cec5SDimitry Andric } 11310b57cec5SDimitry Andric __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk); 11320b57cec5SDimitry Andric } 11330b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 11340b57cec5SDimitry Andric if (pr->flags.use_hier) { 11350b57cec5SDimitry Andric pr->u.p.count = 0; 11360b57cec5SDimitry Andric pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0; 11370b57cec5SDimitry Andric } 11380b57cec5SDimitry Andric #endif // KMP_USER_HIER_SCHED 11390b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 11400b57cec5SDimitry Andric } 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric #ifdef KMP_DEBUG 11430b57cec5SDimitry Andric { 11440b57cec5SDimitry Andric char *buff; 11450b57cec5SDimitry Andric // create format specifiers before the debug output 11460b57cec5SDimitry Andric buff = __kmp_str_format( 11470b57cec5SDimitry Andric "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s " 11480b57cec5SDimitry Andric "lb:%%%s ub:%%%s" 11490b57cec5SDimitry Andric " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" 11500b57cec5SDimitry Andric " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n", 11510b57cec5SDimitry Andric traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec, 11520b57cec5SDimitry Andric traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec, 11530b57cec5SDimitry Andric traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec, 11540b57cec5SDimitry Andric traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec); 11550b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb, 11560b57cec5SDimitry Andric pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, 11570b57cec5SDimitry Andric pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1, 11580b57cec5SDimitry Andric pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4)); 11590b57cec5SDimitry Andric __kmp_str_free(&buff); 11600b57cec5SDimitry Andric } 11610b57cec5SDimitry Andric #endif 11620b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 11630b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_work) { 11640b57cec5SDimitry Andric ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 11650b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 11660b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_work)( 1167*0fca6ea1SDimitry Andric ompt_get_work_schedule(pr->schedule), ompt_scope_begin, 1168*0fca6ea1SDimitry Andric &(team_info->parallel_data), &(task_info->task_data), pr->u.p.tc, 1169*0fca6ea1SDimitry Andric OMPT_LOAD_RETURN_ADDRESS(gtid)); 11700b57cec5SDimitry Andric } 11710b57cec5SDimitry Andric #endif 11720b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic); 11730b57cec5SDimitry Andric } 11740b57cec5SDimitry Andric 11750b57cec5SDimitry Andric /* For ordered loops, either __kmp_dispatch_finish() should be called after 11760b57cec5SDimitry Andric * every iteration, or __kmp_dispatch_finish_chunk() should be called after 11770b57cec5SDimitry Andric * every chunk of iterations. If the ordered section(s) were not executed 11780b57cec5SDimitry Andric * for this iteration (or every iteration in this chunk), we need to set the 11790b57cec5SDimitry Andric * ordered iteration counters so that the next thread can proceed. */ 11800b57cec5SDimitry Andric template <typename UT> 11810b57cec5SDimitry Andric static void __kmp_dispatch_finish(int gtid, ident_t *loc) { 11820b57cec5SDimitry Andric typedef typename traits_t<UT>::signed_t ST; 1183e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 11840b57cec5SDimitry Andric kmp_info_t *th = __kmp_threads[gtid]; 11850b57cec5SDimitry Andric 11860b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid)); 11870b57cec5SDimitry Andric if (!th->th.th_team->t.t_serialized) { 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric dispatch_private_info_template<UT> *pr = 11900b57cec5SDimitry Andric reinterpret_cast<dispatch_private_info_template<UT> *>( 11910b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_pr_current); 11920b57cec5SDimitry Andric dispatch_shared_info_template<UT> volatile *sh = 11930b57cec5SDimitry Andric reinterpret_cast<dispatch_shared_info_template<UT> volatile *>( 11940b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_sh_current); 11950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(pr); 11960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sh); 11970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th->th.th_dispatch == 11980b57cec5SDimitry Andric &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andric if (pr->ordered_bumped) { 12010b57cec5SDimitry Andric KD_TRACE( 12020b57cec5SDimitry Andric 1000, 12030b57cec5SDimitry Andric ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", 12040b57cec5SDimitry Andric gtid)); 12050b57cec5SDimitry Andric pr->ordered_bumped = 0; 12060b57cec5SDimitry Andric } else { 12070b57cec5SDimitry Andric UT lower = pr->u.p.ordered_lower; 12080b57cec5SDimitry Andric 12090b57cec5SDimitry Andric #ifdef KMP_DEBUG 12100b57cec5SDimitry Andric { 12110b57cec5SDimitry Andric char *buff; 12120b57cec5SDimitry Andric // create format specifiers before the debug output 12130b57cec5SDimitry Andric buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: " 12140b57cec5SDimitry Andric "ordered_iteration:%%%s lower:%%%s\n", 12150b57cec5SDimitry Andric traits_t<UT>::spec, traits_t<UT>::spec); 12160b57cec5SDimitry Andric KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); 12170b57cec5SDimitry Andric __kmp_str_free(&buff); 12180b57cec5SDimitry Andric } 12190b57cec5SDimitry Andric #endif 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower, 12220b57cec5SDimitry Andric __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL)); 12230b57cec5SDimitry Andric KMP_MB(); /* is this necessary? */ 12240b57cec5SDimitry Andric #ifdef KMP_DEBUG 12250b57cec5SDimitry Andric { 12260b57cec5SDimitry Andric char *buff; 12270b57cec5SDimitry Andric // create format specifiers before the debug output 12280b57cec5SDimitry Andric buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: " 12290b57cec5SDimitry Andric "ordered_iteration:%%%s lower:%%%s\n", 12300b57cec5SDimitry Andric traits_t<UT>::spec, traits_t<UT>::spec); 12310b57cec5SDimitry Andric KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); 12320b57cec5SDimitry Andric __kmp_str_free(&buff); 12330b57cec5SDimitry Andric } 12340b57cec5SDimitry Andric #endif 12350b57cec5SDimitry Andric 12360b57cec5SDimitry Andric test_then_inc<ST>((volatile ST *)&sh->u.s.ordered_iteration); 12370b57cec5SDimitry Andric } // if 12380b57cec5SDimitry Andric } // if 12390b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid)); 12400b57cec5SDimitry Andric } 12410b57cec5SDimitry Andric 12420b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT 12430b57cec5SDimitry Andric 12440b57cec5SDimitry Andric template <typename UT> 12450b57cec5SDimitry Andric static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) { 12460b57cec5SDimitry Andric typedef typename traits_t<UT>::signed_t ST; 1247e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 12480b57cec5SDimitry Andric kmp_info_t *th = __kmp_threads[gtid]; 12490b57cec5SDimitry Andric 12500b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid)); 12510b57cec5SDimitry Andric if (!th->th.th_team->t.t_serialized) { 12520b57cec5SDimitry Andric dispatch_private_info_template<UT> *pr = 12530b57cec5SDimitry Andric reinterpret_cast<dispatch_private_info_template<UT> *>( 12540b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_pr_current); 12550b57cec5SDimitry Andric dispatch_shared_info_template<UT> volatile *sh = 12560b57cec5SDimitry Andric reinterpret_cast<dispatch_shared_info_template<UT> volatile *>( 12570b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_sh_current); 12580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(pr); 12590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sh); 12600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th->th.th_dispatch == 12610b57cec5SDimitry Andric &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); 12620b57cec5SDimitry Andric 12630b57cec5SDimitry Andric UT lower = pr->u.p.ordered_lower; 12640b57cec5SDimitry Andric UT upper = pr->u.p.ordered_upper; 12650b57cec5SDimitry Andric UT inc = upper - lower + 1; 12660b57cec5SDimitry Andric 12670b57cec5SDimitry Andric if (pr->ordered_bumped == inc) { 12680b57cec5SDimitry Andric KD_TRACE( 12690b57cec5SDimitry Andric 1000, 12700b57cec5SDimitry Andric ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", 12710b57cec5SDimitry Andric gtid)); 12720b57cec5SDimitry Andric pr->ordered_bumped = 0; 12730b57cec5SDimitry Andric } else { 12740b57cec5SDimitry Andric inc -= pr->ordered_bumped; 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric #ifdef KMP_DEBUG 12770b57cec5SDimitry Andric { 12780b57cec5SDimitry Andric char *buff; 12790b57cec5SDimitry Andric // create format specifiers before the debug output 12800b57cec5SDimitry Andric buff = __kmp_str_format( 12810b57cec5SDimitry Andric "__kmp_dispatch_finish_chunk: T#%%d before wait: " 12820b57cec5SDimitry Andric "ordered_iteration:%%%s lower:%%%s upper:%%%s\n", 12830b57cec5SDimitry Andric traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec); 12840b57cec5SDimitry Andric KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper)); 12850b57cec5SDimitry Andric __kmp_str_free(&buff); 12860b57cec5SDimitry Andric } 12870b57cec5SDimitry Andric #endif 12880b57cec5SDimitry Andric 12890b57cec5SDimitry Andric __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower, 12900b57cec5SDimitry Andric __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL)); 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric KMP_MB(); /* is this necessary? */ 12930b57cec5SDimitry Andric KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting " 12940b57cec5SDimitry Andric "ordered_bumped to zero\n", 12950b57cec5SDimitry Andric gtid)); 12960b57cec5SDimitry Andric pr->ordered_bumped = 0; 12970b57cec5SDimitry Andric //!!!!! TODO check if the inc should be unsigned, or signed??? 12980b57cec5SDimitry Andric #ifdef KMP_DEBUG 12990b57cec5SDimitry Andric { 13000b57cec5SDimitry Andric char *buff; 13010b57cec5SDimitry Andric // create format specifiers before the debug output 13020b57cec5SDimitry Andric buff = __kmp_str_format( 13030b57cec5SDimitry Andric "__kmp_dispatch_finish_chunk: T#%%d after wait: " 13040b57cec5SDimitry Andric "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n", 13050b57cec5SDimitry Andric traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec, 13060b57cec5SDimitry Andric traits_t<UT>::spec); 13070b57cec5SDimitry Andric KD_TRACE(1000, 13080b57cec5SDimitry Andric (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper)); 13090b57cec5SDimitry Andric __kmp_str_free(&buff); 13100b57cec5SDimitry Andric } 13110b57cec5SDimitry Andric #endif 13120b57cec5SDimitry Andric 13130b57cec5SDimitry Andric test_then_add<ST>((volatile ST *)&sh->u.s.ordered_iteration, inc); 13140b57cec5SDimitry Andric } 13150b57cec5SDimitry Andric // } 13160b57cec5SDimitry Andric } 13170b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid)); 13180b57cec5SDimitry Andric } 13190b57cec5SDimitry Andric 13200b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */ 13210b57cec5SDimitry Andric 13220b57cec5SDimitry Andric template <typename T> 13230b57cec5SDimitry Andric int __kmp_dispatch_next_algorithm(int gtid, 13240b57cec5SDimitry Andric dispatch_private_info_template<T> *pr, 13250b57cec5SDimitry Andric dispatch_shared_info_template<T> volatile *sh, 13260b57cec5SDimitry Andric kmp_int32 *p_last, T *p_lb, T *p_ub, 13270b57cec5SDimitry Andric typename traits_t<T>::signed_t *p_st, T nproc, 13280b57cec5SDimitry Andric T tid) { 13290b57cec5SDimitry Andric typedef typename traits_t<T>::unsigned_t UT; 13300b57cec5SDimitry Andric typedef typename traits_t<T>::signed_t ST; 13310b57cec5SDimitry Andric typedef typename traits_t<T>::floating_t DBL; 13320b57cec5SDimitry Andric int status = 0; 1333e8d8bef9SDimitry Andric bool last = false; 13340b57cec5SDimitry Andric T start; 13350b57cec5SDimitry Andric ST incr; 13360b57cec5SDimitry Andric UT limit, trip, init; 13370b57cec5SDimitry Andric kmp_info_t *th = __kmp_threads[gtid]; 13380b57cec5SDimitry Andric kmp_team_t *team = th->th.th_team; 13390b57cec5SDimitry Andric 13400b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th->th.th_dispatch == 13410b57cec5SDimitry Andric &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); 13420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(pr); 13430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sh); 13440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc); 13450b57cec5SDimitry Andric #ifdef KMP_DEBUG 13460b57cec5SDimitry Andric { 13470b57cec5SDimitry Andric char *buff; 13480b57cec5SDimitry Andric // create format specifiers before the debug output 13490b57cec5SDimitry Andric buff = 13500b57cec5SDimitry Andric __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d called pr:%%p " 13510b57cec5SDimitry Andric "sh:%%p nproc:%%%s tid:%%%s\n", 13520b57cec5SDimitry Andric traits_t<T>::spec, traits_t<T>::spec); 13530b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid)); 13540b57cec5SDimitry Andric __kmp_str_free(&buff); 13550b57cec5SDimitry Andric } 13560b57cec5SDimitry Andric #endif 13570b57cec5SDimitry Andric 13580b57cec5SDimitry Andric // zero trip count 13590b57cec5SDimitry Andric if (pr->u.p.tc == 0) { 13600b57cec5SDimitry Andric KD_TRACE(10, 13610b57cec5SDimitry Andric ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is " 13620b57cec5SDimitry Andric "zero status:%d\n", 13630b57cec5SDimitry Andric gtid, status)); 13640b57cec5SDimitry Andric return 0; 13650b57cec5SDimitry Andric } 13660b57cec5SDimitry Andric 13670b57cec5SDimitry Andric switch (pr->schedule) { 1368fe6060f1SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 13690b57cec5SDimitry Andric case kmp_sch_static_steal: { 13700b57cec5SDimitry Andric T chunk = pr->u.p.parm1; 1371fe6060f1SDimitry Andric UT nchunks = pr->u.p.parm2; 13720b57cec5SDimitry Andric KD_TRACE(100, 13730b57cec5SDimitry Andric ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n", 13740b57cec5SDimitry Andric gtid)); 13750b57cec5SDimitry Andric 13760b57cec5SDimitry Andric trip = pr->u.p.tc - 1; 13770b57cec5SDimitry Andric 13780b57cec5SDimitry Andric if (traits_t<T>::type_size > 4) { 1379fe6060f1SDimitry Andric // use lock for 8-byte induction variable. 1380fe6060f1SDimitry Andric // TODO (optional): check presence and use 16-byte CAS 1381fe6060f1SDimitry Andric kmp_lock_t *lck = pr->u.p.steal_lock; 13820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(lck != NULL); 13830b57cec5SDimitry Andric if (pr->u.p.count < (UT)pr->u.p.ub) { 1384fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(pr->steal_flag == READY); 13850b57cec5SDimitry Andric __kmp_acquire_lock(lck, gtid); 13860b57cec5SDimitry Andric // try to get own chunk of iterations 13870b57cec5SDimitry Andric init = (pr->u.p.count)++; 13880b57cec5SDimitry Andric status = (init < (UT)pr->u.p.ub); 13890b57cec5SDimitry Andric __kmp_release_lock(lck, gtid); 13900b57cec5SDimitry Andric } else { 13910b57cec5SDimitry Andric status = 0; // no own chunks 13920b57cec5SDimitry Andric } 13930b57cec5SDimitry Andric if (!status) { // try to steal 1394fe6060f1SDimitry Andric kmp_lock_t *lckv; // victim buffer's lock 1395e8d8bef9SDimitry Andric T while_limit = pr->u.p.parm3; 1396e8d8bef9SDimitry Andric T while_index = 0; 13975ffd83dbSDimitry Andric int idx = (th->th.th_dispatch->th_disp_index - 1) % 13985ffd83dbSDimitry Andric __kmp_dispatch_num_buffers; // current loop index 13995ffd83dbSDimitry Andric // note: victim thread can potentially execute another loop 1400fe6060f1SDimitry Andric KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF); // mark self buffer inactive 14010b57cec5SDimitry Andric while ((!status) && (while_limit != ++while_index)) { 1402fe6060f1SDimitry Andric dispatch_private_info_template<T> *v; 14030b57cec5SDimitry Andric T remaining; 1404fe6060f1SDimitry Andric T victimId = pr->u.p.parm4; 1405fe6060f1SDimitry Andric T oldVictimId = victimId ? victimId - 1 : nproc - 1; 1406fe6060f1SDimitry Andric v = reinterpret_cast<dispatch_private_info_template<T> *>( 1407fe6060f1SDimitry Andric &team->t.t_dispatch[victimId].th_disp_buffer[idx]); 1408fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(v); 1409fe6060f1SDimitry Andric while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) && 1410fe6060f1SDimitry Andric oldVictimId != victimId) { 1411fe6060f1SDimitry Andric victimId = (victimId + 1) % nproc; 1412fe6060f1SDimitry Andric v = reinterpret_cast<dispatch_private_info_template<T> *>( 1413fe6060f1SDimitry Andric &team->t.t_dispatch[victimId].th_disp_buffer[idx]); 1414fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(v); 14150b57cec5SDimitry Andric } 1416fe6060f1SDimitry Andric if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) { 14170b57cec5SDimitry Andric continue; // try once more (nproc attempts in total) 14180b57cec5SDimitry Andric } 1419fe6060f1SDimitry Andric if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) { 1420fe6060f1SDimitry Andric kmp_uint32 old = UNUSED; 1421fe6060f1SDimitry Andric // try to steal whole range from inactive victim 1422fe6060f1SDimitry Andric status = v->steal_flag.compare_exchange_strong(old, THIEF); 1423fe6060f1SDimitry Andric if (status) { 1424fe6060f1SDimitry Andric // initialize self buffer with victim's whole range of chunks 1425fe6060f1SDimitry Andric T id = victimId; 14265f757f3fSDimitry Andric T small_chunk = 0, extras = 0, p_extra = 0; 14275f757f3fSDimitry Andric __kmp_initialize_self_buffer<T>(team, id, pr, nchunks, nproc, 14285f757f3fSDimitry Andric init, small_chunk, extras, 14295f757f3fSDimitry Andric p_extra); 1430fe6060f1SDimitry Andric __kmp_acquire_lock(lck, gtid); 1431fe6060f1SDimitry Andric pr->u.p.count = init + 1; // exclude one we execute immediately 14325f757f3fSDimitry Andric pr->u.p.ub = init + small_chunk + p_extra + (id < extras ? 1 : 0); 1433fe6060f1SDimitry Andric __kmp_release_lock(lck, gtid); 1434fe6060f1SDimitry Andric pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid 1435fe6060f1SDimitry Andric // no need to reinitialize other thread invariants: lb, st, etc. 1436fe6060f1SDimitry Andric #ifdef KMP_DEBUG 1437fe6060f1SDimitry Andric { 1438fe6060f1SDimitry Andric char *buff; 1439fe6060f1SDimitry Andric // create format specifiers before the debug output 14405f757f3fSDimitry Andric buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d " 14415f757f3fSDimitry Andric "stolen chunks from T#%%d, " 1442fe6060f1SDimitry Andric "count:%%%s ub:%%%s\n", 1443fe6060f1SDimitry Andric traits_t<UT>::spec, traits_t<T>::spec); 1444fe6060f1SDimitry Andric KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub)); 1445fe6060f1SDimitry Andric __kmp_str_free(&buff); 1446fe6060f1SDimitry Andric } 1447fe6060f1SDimitry Andric #endif 1448fe6060f1SDimitry Andric // activate non-empty buffer and let others steal from us 1449fe6060f1SDimitry Andric if (pr->u.p.count < (UT)pr->u.p.ub) 1450fe6060f1SDimitry Andric KMP_ATOMIC_ST_REL(&pr->steal_flag, READY); 1451fe6060f1SDimitry Andric break; 1452fe6060f1SDimitry Andric } 1453fe6060f1SDimitry Andric } 1454bdd1243dSDimitry Andric if (KMP_ATOMIC_LD_ACQ(&v->steal_flag) != READY || 1455fe6060f1SDimitry Andric v->u.p.count >= (UT)v->u.p.ub) { 1456fe6060f1SDimitry Andric pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim tid 1457fe6060f1SDimitry Andric continue; // no chunks to steal, try next victim 1458fe6060f1SDimitry Andric } 1459fe6060f1SDimitry Andric lckv = v->u.p.steal_lock; 1460fe6060f1SDimitry Andric KMP_ASSERT(lckv != NULL); 1461fe6060f1SDimitry Andric __kmp_acquire_lock(lckv, gtid); 1462fe6060f1SDimitry Andric limit = v->u.p.ub; // keep initial ub 1463fe6060f1SDimitry Andric if (v->u.p.count >= limit) { 1464fe6060f1SDimitry Andric __kmp_release_lock(lckv, gtid); 1465fe6060f1SDimitry Andric pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim tid 1466fe6060f1SDimitry Andric continue; // no chunks to steal, try next victim 14670b57cec5SDimitry Andric } 14680b57cec5SDimitry Andric 1469fe6060f1SDimitry Andric // stealing succeded, reduce victim's ub by 1/4 of undone chunks 1470fe6060f1SDimitry Andric // TODO: is this heuristics good enough?? 1471fe6060f1SDimitry Andric remaining = limit - v->u.p.count; 1472fe6060f1SDimitry Andric if (remaining > 7) { 14730b57cec5SDimitry Andric // steal 1/4 of remaining 14740b57cec5SDimitry Andric KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2); 1475fe6060f1SDimitry Andric init = (v->u.p.ub -= (remaining >> 2)); 14760b57cec5SDimitry Andric } else { 1477fe6060f1SDimitry Andric // steal 1 chunk of 1..7 remaining 14780b57cec5SDimitry Andric KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1); 1479fe6060f1SDimitry Andric init = (v->u.p.ub -= 1); 14800b57cec5SDimitry Andric } 1481fe6060f1SDimitry Andric __kmp_release_lock(lckv, gtid); 1482fe6060f1SDimitry Andric #ifdef KMP_DEBUG 1483fe6060f1SDimitry Andric { 1484fe6060f1SDimitry Andric char *buff; 1485fe6060f1SDimitry Andric // create format specifiers before the debug output 1486fe6060f1SDimitry Andric buff = __kmp_str_format( 1487fe6060f1SDimitry Andric "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, " 1488fe6060f1SDimitry Andric "count:%%%s ub:%%%s\n", 1489fe6060f1SDimitry Andric traits_t<UT>::spec, traits_t<UT>::spec); 1490fe6060f1SDimitry Andric KD_TRACE(10, (buff, gtid, victimId, init, limit)); 1491fe6060f1SDimitry Andric __kmp_str_free(&buff); 1492fe6060f1SDimitry Andric } 1493fe6060f1SDimitry Andric #endif 14940b57cec5SDimitry Andric KMP_DEBUG_ASSERT(init + 1 <= limit); 1495fe6060f1SDimitry Andric pr->u.p.parm4 = victimId; // remember victim to steal from 14960b57cec5SDimitry Andric status = 1; 1497fe6060f1SDimitry Andric // now update own count and ub with stolen range excluding init chunk 1498fe6060f1SDimitry Andric __kmp_acquire_lock(lck, gtid); 14990b57cec5SDimitry Andric pr->u.p.count = init + 1; 15000b57cec5SDimitry Andric pr->u.p.ub = limit; 1501fe6060f1SDimitry Andric __kmp_release_lock(lck, gtid); 1502fe6060f1SDimitry Andric // activate non-empty buffer and let others steal from us 1503fe6060f1SDimitry Andric if (init + 1 < limit) 1504fe6060f1SDimitry Andric KMP_ATOMIC_ST_REL(&pr->steal_flag, READY); 15050b57cec5SDimitry Andric } // while (search for victim) 15060b57cec5SDimitry Andric } // if (try to find victim and steal) 15070b57cec5SDimitry Andric } else { 15080b57cec5SDimitry Andric // 4-byte induction variable, use 8-byte CAS for pair (count, ub) 1509fe6060f1SDimitry Andric // as all operations on pair (count, ub) must be done atomically 15100b57cec5SDimitry Andric typedef union { 15110b57cec5SDimitry Andric struct { 15120b57cec5SDimitry Andric UT count; 15130b57cec5SDimitry Andric T ub; 15140b57cec5SDimitry Andric } p; 15150b57cec5SDimitry Andric kmp_int64 b; 15160b57cec5SDimitry Andric } union_i4; 15170b57cec5SDimitry Andric union_i4 vold, vnew; 1518fe6060f1SDimitry Andric if (pr->u.p.count < (UT)pr->u.p.ub) { 1519fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(pr->steal_flag == READY); 15200b57cec5SDimitry Andric vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); 1521fe6060f1SDimitry Andric vnew.b = vold.b; 1522fe6060f1SDimitry Andric vnew.p.count++; // get chunk from head of self range 1523fe6060f1SDimitry Andric while (!KMP_COMPARE_AND_STORE_REL64( 15240b57cec5SDimitry Andric (volatile kmp_int64 *)&pr->u.p.count, 15250b57cec5SDimitry Andric *VOLATILE_CAST(kmp_int64 *) & vold.b, 15260b57cec5SDimitry Andric *VOLATILE_CAST(kmp_int64 *) & vnew.b)) { 15270b57cec5SDimitry Andric KMP_CPU_PAUSE(); 15280b57cec5SDimitry Andric vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); 1529fe6060f1SDimitry Andric vnew.b = vold.b; 15300b57cec5SDimitry Andric vnew.p.count++; 15310b57cec5SDimitry Andric } 1532fe6060f1SDimitry Andric init = vold.p.count; 1533fe6060f1SDimitry Andric status = (init < (UT)vold.p.ub); 1534fe6060f1SDimitry Andric } else { 1535fe6060f1SDimitry Andric status = 0; // no own chunks 15360b57cec5SDimitry Andric } 1537fe6060f1SDimitry Andric if (!status) { // try to steal 1538e8d8bef9SDimitry Andric T while_limit = pr->u.p.parm3; 1539e8d8bef9SDimitry Andric T while_index = 0; 15405ffd83dbSDimitry Andric int idx = (th->th.th_dispatch->th_disp_index - 1) % 15415ffd83dbSDimitry Andric __kmp_dispatch_num_buffers; // current loop index 15425ffd83dbSDimitry Andric // note: victim thread can potentially execute another loop 1543fe6060f1SDimitry Andric KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF); // mark self buffer inactive 15440b57cec5SDimitry Andric while ((!status) && (while_limit != ++while_index)) { 1545fe6060f1SDimitry Andric dispatch_private_info_template<T> *v; 1546e8d8bef9SDimitry Andric T remaining; 1547fe6060f1SDimitry Andric T victimId = pr->u.p.parm4; 1548fe6060f1SDimitry Andric T oldVictimId = victimId ? victimId - 1 : nproc - 1; 1549fe6060f1SDimitry Andric v = reinterpret_cast<dispatch_private_info_template<T> *>( 1550fe6060f1SDimitry Andric &team->t.t_dispatch[victimId].th_disp_buffer[idx]); 1551fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(v); 1552fe6060f1SDimitry Andric while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) && 1553fe6060f1SDimitry Andric oldVictimId != victimId) { 1554fe6060f1SDimitry Andric victimId = (victimId + 1) % nproc; 1555fe6060f1SDimitry Andric v = reinterpret_cast<dispatch_private_info_template<T> *>( 1556fe6060f1SDimitry Andric &team->t.t_dispatch[victimId].th_disp_buffer[idx]); 1557fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(v); 15580b57cec5SDimitry Andric } 1559fe6060f1SDimitry Andric if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) { 15600b57cec5SDimitry Andric continue; // try once more (nproc attempts in total) 15610b57cec5SDimitry Andric } 1562fe6060f1SDimitry Andric if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) { 1563fe6060f1SDimitry Andric kmp_uint32 old = UNUSED; 1564fe6060f1SDimitry Andric // try to steal whole range from inactive victim 1565fe6060f1SDimitry Andric status = v->steal_flag.compare_exchange_strong(old, THIEF); 1566fe6060f1SDimitry Andric if (status) { 1567fe6060f1SDimitry Andric // initialize self buffer with victim's whole range of chunks 1568fe6060f1SDimitry Andric T id = victimId; 15695f757f3fSDimitry Andric T small_chunk = 0, extras = 0, p_extra = 0; 15705f757f3fSDimitry Andric __kmp_initialize_self_buffer<T>(team, id, pr, nchunks, nproc, 15715f757f3fSDimitry Andric init, small_chunk, extras, 15725f757f3fSDimitry Andric p_extra); 1573fe6060f1SDimitry Andric vnew.p.count = init + 1; 15745f757f3fSDimitry Andric vnew.p.ub = init + small_chunk + p_extra + (id < extras ? 1 : 0); 1575fe6060f1SDimitry Andric // write pair (count, ub) at once atomically 1576fe6060f1SDimitry Andric #if KMP_ARCH_X86 1577fe6060f1SDimitry Andric KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vnew.b); 1578fe6060f1SDimitry Andric #else 1579fe6060f1SDimitry Andric *(volatile kmp_int64 *)(&pr->u.p.count) = vnew.b; 1580fe6060f1SDimitry Andric #endif 1581fe6060f1SDimitry Andric pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid 1582fe6060f1SDimitry Andric // no need to initialize other thread invariants: lb, st, etc. 1583fe6060f1SDimitry Andric #ifdef KMP_DEBUG 1584fe6060f1SDimitry Andric { 1585fe6060f1SDimitry Andric char *buff; 1586fe6060f1SDimitry Andric // create format specifiers before the debug output 15875f757f3fSDimitry Andric buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d " 15885f757f3fSDimitry Andric "stolen chunks from T#%%d, " 1589fe6060f1SDimitry Andric "count:%%%s ub:%%%s\n", 1590fe6060f1SDimitry Andric traits_t<UT>::spec, traits_t<T>::spec); 1591fe6060f1SDimitry Andric KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub)); 1592fe6060f1SDimitry Andric __kmp_str_free(&buff); 15930b57cec5SDimitry Andric } 1594fe6060f1SDimitry Andric #endif 1595fe6060f1SDimitry Andric // activate non-empty buffer and let others steal from us 1596fe6060f1SDimitry Andric if (pr->u.p.count < (UT)pr->u.p.ub) 1597fe6060f1SDimitry Andric KMP_ATOMIC_ST_REL(&pr->steal_flag, READY); 1598fe6060f1SDimitry Andric break; 1599fe6060f1SDimitry Andric } 1600fe6060f1SDimitry Andric } 1601fe6060f1SDimitry Andric while (1) { // CAS loop with check if victim still has enough chunks 1602fe6060f1SDimitry Andric // many threads may be stealing concurrently from same victim 1603fe6060f1SDimitry Andric vold.b = *(volatile kmp_int64 *)(&v->u.p.count); 1604fe6060f1SDimitry Andric if (KMP_ATOMIC_LD_ACQ(&v->steal_flag) != READY || 1605fe6060f1SDimitry Andric vold.p.count >= (UT)vold.p.ub) { 1606fe6060f1SDimitry Andric pr->u.p.parm4 = (victimId + 1) % nproc; // shift start victim id 1607fe6060f1SDimitry Andric break; // no chunks to steal, try next victim 1608fe6060f1SDimitry Andric } 1609fe6060f1SDimitry Andric vnew.b = vold.b; 1610fe6060f1SDimitry Andric remaining = vold.p.ub - vold.p.count; 1611e8d8bef9SDimitry Andric // try to steal 1/4 of remaining 1612fe6060f1SDimitry Andric // TODO: is this heuristics good enough?? 1613fe6060f1SDimitry Andric if (remaining > 7) { 1614fe6060f1SDimitry Andric vnew.p.ub -= remaining >> 2; // steal from tail of victim's range 16150b57cec5SDimitry Andric } else { 1616fe6060f1SDimitry Andric vnew.p.ub -= 1; // steal 1 chunk of 1..7 remaining 16170b57cec5SDimitry Andric } 1618fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(vnew.p.ub * (UT)chunk <= trip); 1619fe6060f1SDimitry Andric if (KMP_COMPARE_AND_STORE_REL64( 1620fe6060f1SDimitry Andric (volatile kmp_int64 *)&v->u.p.count, 16210b57cec5SDimitry Andric *VOLATILE_CAST(kmp_int64 *) & vold.b, 16220b57cec5SDimitry Andric *VOLATILE_CAST(kmp_int64 *) & vnew.b)) { 1623fe6060f1SDimitry Andric // stealing succedded 1624fe6060f1SDimitry Andric #ifdef KMP_DEBUG 1625fe6060f1SDimitry Andric { 1626fe6060f1SDimitry Andric char *buff; 1627fe6060f1SDimitry Andric // create format specifiers before the debug output 1628fe6060f1SDimitry Andric buff = __kmp_str_format( 1629fe6060f1SDimitry Andric "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, " 1630fe6060f1SDimitry Andric "count:%%%s ub:%%%s\n", 1631fe6060f1SDimitry Andric traits_t<T>::spec, traits_t<T>::spec); 1632fe6060f1SDimitry Andric KD_TRACE(10, (buff, gtid, victimId, vnew.p.ub, vold.p.ub)); 1633fe6060f1SDimitry Andric __kmp_str_free(&buff); 1634fe6060f1SDimitry Andric } 1635fe6060f1SDimitry Andric #endif 16360b57cec5SDimitry Andric KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 16370b57cec5SDimitry Andric vold.p.ub - vnew.p.ub); 16380b57cec5SDimitry Andric status = 1; 1639fe6060f1SDimitry Andric pr->u.p.parm4 = victimId; // keep victim id 16400b57cec5SDimitry Andric // now update own count and ub 16410b57cec5SDimitry Andric init = vnew.p.ub; 16420b57cec5SDimitry Andric vold.p.count = init + 1; 16430b57cec5SDimitry Andric #if KMP_ARCH_X86 16440b57cec5SDimitry Andric KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vold.b); 16450b57cec5SDimitry Andric #else 16460b57cec5SDimitry Andric *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b; 16470b57cec5SDimitry Andric #endif 1648fe6060f1SDimitry Andric // activate non-empty buffer and let others steal from us 1649fe6060f1SDimitry Andric if (vold.p.count < (UT)vold.p.ub) 1650fe6060f1SDimitry Andric KMP_ATOMIC_ST_REL(&pr->steal_flag, READY); 16510b57cec5SDimitry Andric break; 16520b57cec5SDimitry Andric } // if (check CAS result) 16535ffd83dbSDimitry Andric KMP_CPU_PAUSE(); // CAS failed, repeatedly attempt 16540b57cec5SDimitry Andric } // while (try to steal from particular victim) 16550b57cec5SDimitry Andric } // while (search for victim) 16560b57cec5SDimitry Andric } // if (try to find victim and steal) 16570b57cec5SDimitry Andric } // if (4-byte induction variable) 16580b57cec5SDimitry Andric if (!status) { 16590b57cec5SDimitry Andric *p_lb = 0; 16600b57cec5SDimitry Andric *p_ub = 0; 16610b57cec5SDimitry Andric if (p_st != NULL) 16620b57cec5SDimitry Andric *p_st = 0; 16630b57cec5SDimitry Andric } else { 1664fe6060f1SDimitry Andric start = pr->u.p.lb; 16650b57cec5SDimitry Andric init *= chunk; 16660b57cec5SDimitry Andric limit = chunk + init - 1; 16670b57cec5SDimitry Andric incr = pr->u.p.st; 16680b57cec5SDimitry Andric KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1); 16690b57cec5SDimitry Andric 16700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(init <= trip); 1671fe6060f1SDimitry Andric // keep track of done chunks for possible early exit from stealing 1672fe6060f1SDimitry Andric // TODO: count executed chunks locally with rare update of shared location 1673fe6060f1SDimitry Andric // test_then_inc<ST>((volatile ST *)&sh->u.s.iteration); 16740b57cec5SDimitry Andric if ((last = (limit >= trip)) != 0) 16750b57cec5SDimitry Andric limit = trip; 16760b57cec5SDimitry Andric if (p_st != NULL) 16770b57cec5SDimitry Andric *p_st = incr; 16780b57cec5SDimitry Andric 16790b57cec5SDimitry Andric if (incr == 1) { 16800b57cec5SDimitry Andric *p_lb = start + init; 16810b57cec5SDimitry Andric *p_ub = start + limit; 16820b57cec5SDimitry Andric } else { 16830b57cec5SDimitry Andric *p_lb = start + init * incr; 16840b57cec5SDimitry Andric *p_ub = start + limit * incr; 16850b57cec5SDimitry Andric } 16860b57cec5SDimitry Andric } // if 16870b57cec5SDimitry Andric break; 16880b57cec5SDimitry Andric } // case 1689fe6060f1SDimitry Andric #endif // KMP_STATIC_STEAL_ENABLED 16900b57cec5SDimitry Andric case kmp_sch_static_balanced: { 16910b57cec5SDimitry Andric KD_TRACE( 16920b57cec5SDimitry Andric 10, 16930b57cec5SDimitry Andric ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n", 16940b57cec5SDimitry Andric gtid)); 16950b57cec5SDimitry Andric /* check if thread has any iteration to do */ 16960b57cec5SDimitry Andric if ((status = !pr->u.p.count) != 0) { 16970b57cec5SDimitry Andric pr->u.p.count = 1; 16980b57cec5SDimitry Andric *p_lb = pr->u.p.lb; 16990b57cec5SDimitry Andric *p_ub = pr->u.p.ub; 1700e8d8bef9SDimitry Andric last = (pr->u.p.parm1 != 0); 17010b57cec5SDimitry Andric if (p_st != NULL) 17020b57cec5SDimitry Andric *p_st = pr->u.p.st; 17030b57cec5SDimitry Andric } else { /* no iterations to do */ 17040b57cec5SDimitry Andric pr->u.p.lb = pr->u.p.ub + pr->u.p.st; 17050b57cec5SDimitry Andric } 17060b57cec5SDimitry Andric } // case 17070b57cec5SDimitry Andric break; 17080b57cec5SDimitry Andric case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was 17090b57cec5SDimitry Andric merged here */ 17100b57cec5SDimitry Andric case kmp_sch_static_chunked: { 17110b57cec5SDimitry Andric T parm1; 17120b57cec5SDimitry Andric 17130b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d " 17140b57cec5SDimitry Andric "kmp_sch_static_[affinity|chunked] case\n", 17150b57cec5SDimitry Andric gtid)); 17160b57cec5SDimitry Andric parm1 = pr->u.p.parm1; 17170b57cec5SDimitry Andric 17180b57cec5SDimitry Andric trip = pr->u.p.tc - 1; 17190b57cec5SDimitry Andric init = parm1 * (pr->u.p.count + tid); 17200b57cec5SDimitry Andric 17210b57cec5SDimitry Andric if ((status = (init <= trip)) != 0) { 17220b57cec5SDimitry Andric start = pr->u.p.lb; 17230b57cec5SDimitry Andric incr = pr->u.p.st; 17240b57cec5SDimitry Andric limit = parm1 + init - 1; 17250b57cec5SDimitry Andric 17260b57cec5SDimitry Andric if ((last = (limit >= trip)) != 0) 17270b57cec5SDimitry Andric limit = trip; 17280b57cec5SDimitry Andric 17290b57cec5SDimitry Andric if (p_st != NULL) 17300b57cec5SDimitry Andric *p_st = incr; 17310b57cec5SDimitry Andric 17320b57cec5SDimitry Andric pr->u.p.count += nproc; 17330b57cec5SDimitry Andric 17340b57cec5SDimitry Andric if (incr == 1) { 17350b57cec5SDimitry Andric *p_lb = start + init; 17360b57cec5SDimitry Andric *p_ub = start + limit; 17370b57cec5SDimitry Andric } else { 17380b57cec5SDimitry Andric *p_lb = start + init * incr; 17390b57cec5SDimitry Andric *p_ub = start + limit * incr; 17400b57cec5SDimitry Andric } 17410b57cec5SDimitry Andric 17420b57cec5SDimitry Andric if (pr->flags.ordered) { 17430b57cec5SDimitry Andric pr->u.p.ordered_lower = init; 17440b57cec5SDimitry Andric pr->u.p.ordered_upper = limit; 17450b57cec5SDimitry Andric } // if 17460b57cec5SDimitry Andric } // if 17470b57cec5SDimitry Andric } // case 17480b57cec5SDimitry Andric break; 17490b57cec5SDimitry Andric 17500b57cec5SDimitry Andric case kmp_sch_dynamic_chunked: { 1751fe6060f1SDimitry Andric UT chunk_number; 1752fe6060f1SDimitry Andric UT chunk_size = pr->u.p.parm1; 1753fe6060f1SDimitry Andric UT nchunks = pr->u.p.parm2; 17540b57cec5SDimitry Andric 17550b57cec5SDimitry Andric KD_TRACE( 17560b57cec5SDimitry Andric 100, 17570b57cec5SDimitry Andric ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n", 17580b57cec5SDimitry Andric gtid)); 17590b57cec5SDimitry Andric 1760fe6060f1SDimitry Andric chunk_number = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration); 1761fe6060f1SDimitry Andric status = (chunk_number < nchunks); 1762fe6060f1SDimitry Andric if (!status) { 17630b57cec5SDimitry Andric *p_lb = 0; 17640b57cec5SDimitry Andric *p_ub = 0; 17650b57cec5SDimitry Andric if (p_st != NULL) 17660b57cec5SDimitry Andric *p_st = 0; 17670b57cec5SDimitry Andric } else { 1768fe6060f1SDimitry Andric init = chunk_size * chunk_number; 1769fe6060f1SDimitry Andric trip = pr->u.p.tc - 1; 17700b57cec5SDimitry Andric start = pr->u.p.lb; 17710b57cec5SDimitry Andric incr = pr->u.p.st; 17720b57cec5SDimitry Andric 1773fe6060f1SDimitry Andric if ((last = (trip - init < (UT)chunk_size))) 17740b57cec5SDimitry Andric limit = trip; 1775fe6060f1SDimitry Andric else 1776fe6060f1SDimitry Andric limit = chunk_size + init - 1; 17770b57cec5SDimitry Andric 17780b57cec5SDimitry Andric if (p_st != NULL) 17790b57cec5SDimitry Andric *p_st = incr; 17800b57cec5SDimitry Andric 17810b57cec5SDimitry Andric if (incr == 1) { 17820b57cec5SDimitry Andric *p_lb = start + init; 17830b57cec5SDimitry Andric *p_ub = start + limit; 17840b57cec5SDimitry Andric } else { 17850b57cec5SDimitry Andric *p_lb = start + init * incr; 17860b57cec5SDimitry Andric *p_ub = start + limit * incr; 17870b57cec5SDimitry Andric } 17880b57cec5SDimitry Andric 17890b57cec5SDimitry Andric if (pr->flags.ordered) { 17900b57cec5SDimitry Andric pr->u.p.ordered_lower = init; 17910b57cec5SDimitry Andric pr->u.p.ordered_upper = limit; 17920b57cec5SDimitry Andric } // if 17930b57cec5SDimitry Andric } // if 17940b57cec5SDimitry Andric } // case 17950b57cec5SDimitry Andric break; 17960b57cec5SDimitry Andric 17970b57cec5SDimitry Andric case kmp_sch_guided_iterative_chunked: { 17980b57cec5SDimitry Andric T chunkspec = pr->u.p.parm1; 17990b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked " 18000b57cec5SDimitry Andric "iterative case\n", 18010b57cec5SDimitry Andric gtid)); 18020b57cec5SDimitry Andric trip = pr->u.p.tc; 18030b57cec5SDimitry Andric // Start atomic part of calculations 18040b57cec5SDimitry Andric while (1) { 18050b57cec5SDimitry Andric ST remaining; // signed, because can be < 0 18060b57cec5SDimitry Andric init = sh->u.s.iteration; // shared value 18070b57cec5SDimitry Andric remaining = trip - init; 18080b57cec5SDimitry Andric if (remaining <= 0) { // AC: need to compare with 0 first 18090b57cec5SDimitry Andric // nothing to do, don't try atomic op 18100b57cec5SDimitry Andric status = 0; 18110b57cec5SDimitry Andric break; 18120b57cec5SDimitry Andric } 18130b57cec5SDimitry Andric if ((T)remaining < 18140b57cec5SDimitry Andric pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default 18155ffd83dbSDimitry Andric // use dynamic-style schedule 1816480093f4SDimitry Andric // atomically increment iterations, get old value 18170b57cec5SDimitry Andric init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration), 18180b57cec5SDimitry Andric (ST)chunkspec); 18190b57cec5SDimitry Andric remaining = trip - init; 18200b57cec5SDimitry Andric if (remaining <= 0) { 18210b57cec5SDimitry Andric status = 0; // all iterations got by other threads 18220b57cec5SDimitry Andric } else { 18230b57cec5SDimitry Andric // got some iterations to work on 18240b57cec5SDimitry Andric status = 1; 18250b57cec5SDimitry Andric if ((T)remaining > chunkspec) { 18260b57cec5SDimitry Andric limit = init + chunkspec - 1; 18270b57cec5SDimitry Andric } else { 1828e8d8bef9SDimitry Andric last = true; // the last chunk 18290b57cec5SDimitry Andric limit = init + remaining - 1; 18300b57cec5SDimitry Andric } // if 18310b57cec5SDimitry Andric } // if 18320b57cec5SDimitry Andric break; 18330b57cec5SDimitry Andric } // if 1834e8d8bef9SDimitry Andric limit = init + (UT)((double)remaining * 1835e8d8bef9SDimitry Andric *(double *)&pr->u.p.parm3); // divide by K*nproc 18360b57cec5SDimitry Andric if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration), 18370b57cec5SDimitry Andric (ST)init, (ST)limit)) { 18380b57cec5SDimitry Andric // CAS was successful, chunk obtained 18390b57cec5SDimitry Andric status = 1; 18400b57cec5SDimitry Andric --limit; 18410b57cec5SDimitry Andric break; 18420b57cec5SDimitry Andric } // if 18430b57cec5SDimitry Andric } // while 18440b57cec5SDimitry Andric if (status != 0) { 18450b57cec5SDimitry Andric start = pr->u.p.lb; 18460b57cec5SDimitry Andric incr = pr->u.p.st; 18470b57cec5SDimitry Andric if (p_st != NULL) 18480b57cec5SDimitry Andric *p_st = incr; 18490b57cec5SDimitry Andric *p_lb = start + init * incr; 18500b57cec5SDimitry Andric *p_ub = start + limit * incr; 18510b57cec5SDimitry Andric if (pr->flags.ordered) { 18520b57cec5SDimitry Andric pr->u.p.ordered_lower = init; 18530b57cec5SDimitry Andric pr->u.p.ordered_upper = limit; 18540b57cec5SDimitry Andric } // if 18550b57cec5SDimitry Andric } else { 18560b57cec5SDimitry Andric *p_lb = 0; 18570b57cec5SDimitry Andric *p_ub = 0; 18580b57cec5SDimitry Andric if (p_st != NULL) 18590b57cec5SDimitry Andric *p_st = 0; 18600b57cec5SDimitry Andric } // if 18610b57cec5SDimitry Andric } // case 18620b57cec5SDimitry Andric break; 18630b57cec5SDimitry Andric 18640b57cec5SDimitry Andric case kmp_sch_guided_simd: { 18650b57cec5SDimitry Andric // same as iterative but curr-chunk adjusted to be multiple of given 18660b57cec5SDimitry Andric // chunk 18670b57cec5SDimitry Andric T chunk = pr->u.p.parm1; 18680b57cec5SDimitry Andric KD_TRACE(100, 18690b57cec5SDimitry Andric ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n", 18700b57cec5SDimitry Andric gtid)); 18710b57cec5SDimitry Andric trip = pr->u.p.tc; 18720b57cec5SDimitry Andric // Start atomic part of calculations 18730b57cec5SDimitry Andric while (1) { 18740b57cec5SDimitry Andric ST remaining; // signed, because can be < 0 18750b57cec5SDimitry Andric init = sh->u.s.iteration; // shared value 18760b57cec5SDimitry Andric remaining = trip - init; 18770b57cec5SDimitry Andric if (remaining <= 0) { // AC: need to compare with 0 first 18780b57cec5SDimitry Andric status = 0; // nothing to do, don't try atomic op 18790b57cec5SDimitry Andric break; 18800b57cec5SDimitry Andric } 1881349cc55cSDimitry Andric KMP_DEBUG_ASSERT(chunk && init % chunk == 0); 18820b57cec5SDimitry Andric // compare with K*nproc*(chunk+1), K=2 by default 18830b57cec5SDimitry Andric if ((T)remaining < pr->u.p.parm2) { 18845ffd83dbSDimitry Andric // use dynamic-style schedule 1885480093f4SDimitry Andric // atomically increment iterations, get old value 18860b57cec5SDimitry Andric init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration), 18870b57cec5SDimitry Andric (ST)chunk); 18880b57cec5SDimitry Andric remaining = trip - init; 18890b57cec5SDimitry Andric if (remaining <= 0) { 18900b57cec5SDimitry Andric status = 0; // all iterations got by other threads 18910b57cec5SDimitry Andric } else { 18920b57cec5SDimitry Andric // got some iterations to work on 18930b57cec5SDimitry Andric status = 1; 18940b57cec5SDimitry Andric if ((T)remaining > chunk) { 18950b57cec5SDimitry Andric limit = init + chunk - 1; 18960b57cec5SDimitry Andric } else { 1897e8d8bef9SDimitry Andric last = true; // the last chunk 18980b57cec5SDimitry Andric limit = init + remaining - 1; 18990b57cec5SDimitry Andric } // if 19000b57cec5SDimitry Andric } // if 19010b57cec5SDimitry Andric break; 19020b57cec5SDimitry Andric } // if 19030b57cec5SDimitry Andric // divide by K*nproc 1904e8d8bef9SDimitry Andric UT span; 1905e8d8bef9SDimitry Andric __kmp_type_convert((double)remaining * (*(double *)&pr->u.p.parm3), 1906e8d8bef9SDimitry Andric &span); 19070b57cec5SDimitry Andric UT rem = span % chunk; 19080b57cec5SDimitry Andric if (rem) // adjust so that span%chunk == 0 19090b57cec5SDimitry Andric span += chunk - rem; 19100b57cec5SDimitry Andric limit = init + span; 19110b57cec5SDimitry Andric if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration), 19120b57cec5SDimitry Andric (ST)init, (ST)limit)) { 19130b57cec5SDimitry Andric // CAS was successful, chunk obtained 19140b57cec5SDimitry Andric status = 1; 19150b57cec5SDimitry Andric --limit; 19160b57cec5SDimitry Andric break; 19170b57cec5SDimitry Andric } // if 19180b57cec5SDimitry Andric } // while 19190b57cec5SDimitry Andric if (status != 0) { 19200b57cec5SDimitry Andric start = pr->u.p.lb; 19210b57cec5SDimitry Andric incr = pr->u.p.st; 19220b57cec5SDimitry Andric if (p_st != NULL) 19230b57cec5SDimitry Andric *p_st = incr; 19240b57cec5SDimitry Andric *p_lb = start + init * incr; 19250b57cec5SDimitry Andric *p_ub = start + limit * incr; 19260b57cec5SDimitry Andric if (pr->flags.ordered) { 19270b57cec5SDimitry Andric pr->u.p.ordered_lower = init; 19280b57cec5SDimitry Andric pr->u.p.ordered_upper = limit; 19290b57cec5SDimitry Andric } // if 19300b57cec5SDimitry Andric } else { 19310b57cec5SDimitry Andric *p_lb = 0; 19320b57cec5SDimitry Andric *p_ub = 0; 19330b57cec5SDimitry Andric if (p_st != NULL) 19340b57cec5SDimitry Andric *p_st = 0; 19350b57cec5SDimitry Andric } // if 19360b57cec5SDimitry Andric } // case 19370b57cec5SDimitry Andric break; 19380b57cec5SDimitry Andric 19390b57cec5SDimitry Andric case kmp_sch_guided_analytical_chunked: { 19400b57cec5SDimitry Andric T chunkspec = pr->u.p.parm1; 19410b57cec5SDimitry Andric UT chunkIdx; 19420b57cec5SDimitry Andric #if KMP_USE_X87CONTROL 19430b57cec5SDimitry Andric /* for storing original FPCW value for Windows* OS on 19440b57cec5SDimitry Andric IA-32 architecture 8-byte version */ 19450b57cec5SDimitry Andric unsigned int oldFpcw; 19460b57cec5SDimitry Andric unsigned int fpcwSet = 0; 19470b57cec5SDimitry Andric #endif 19480b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d " 19490b57cec5SDimitry Andric "kmp_sch_guided_analytical_chunked case\n", 19500b57cec5SDimitry Andric gtid)); 19510b57cec5SDimitry Andric 19520b57cec5SDimitry Andric trip = pr->u.p.tc; 19530b57cec5SDimitry Andric 19540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nproc > 1); 19550b57cec5SDimitry Andric KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip); 19560b57cec5SDimitry Andric 19570b57cec5SDimitry Andric while (1) { /* this while loop is a safeguard against unexpected zero 19580b57cec5SDimitry Andric chunk sizes */ 19590b57cec5SDimitry Andric chunkIdx = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration); 19600b57cec5SDimitry Andric if (chunkIdx >= (UT)pr->u.p.parm2) { 19610b57cec5SDimitry Andric --trip; 19620b57cec5SDimitry Andric /* use dynamic-style scheduling */ 19630b57cec5SDimitry Andric init = chunkIdx * chunkspec + pr->u.p.count; 19640b57cec5SDimitry Andric /* need to verify init > 0 in case of overflow in the above 19650b57cec5SDimitry Andric * calculation */ 19660b57cec5SDimitry Andric if ((status = (init > 0 && init <= trip)) != 0) { 19670b57cec5SDimitry Andric limit = init + chunkspec - 1; 19680b57cec5SDimitry Andric 19690b57cec5SDimitry Andric if ((last = (limit >= trip)) != 0) 19700b57cec5SDimitry Andric limit = trip; 19710b57cec5SDimitry Andric } 19720b57cec5SDimitry Andric break; 19730b57cec5SDimitry Andric } else { 19740b57cec5SDimitry Andric /* use exponential-style scheduling */ 19750b57cec5SDimitry Andric /* The following check is to workaround the lack of long double precision on 19760b57cec5SDimitry Andric Windows* OS. 19770b57cec5SDimitry Andric This check works around the possible effect that init != 0 for chunkIdx == 0. 19780b57cec5SDimitry Andric */ 19790b57cec5SDimitry Andric #if KMP_USE_X87CONTROL 19800b57cec5SDimitry Andric /* If we haven't already done so, save original 19810b57cec5SDimitry Andric FPCW and set precision to 64-bit, as Windows* OS 19820b57cec5SDimitry Andric on IA-32 architecture defaults to 53-bit */ 19830b57cec5SDimitry Andric if (!fpcwSet) { 19840b57cec5SDimitry Andric oldFpcw = _control87(0, 0); 19850b57cec5SDimitry Andric _control87(_PC_64, _MCW_PC); 19860b57cec5SDimitry Andric fpcwSet = 0x30000; 19870b57cec5SDimitry Andric } 19880b57cec5SDimitry Andric #endif 19890b57cec5SDimitry Andric if (chunkIdx) { 19900b57cec5SDimitry Andric init = __kmp_dispatch_guided_remaining<T>( 19910b57cec5SDimitry Andric trip, *(DBL *)&pr->u.p.parm3, chunkIdx); 19920b57cec5SDimitry Andric KMP_DEBUG_ASSERT(init); 19930b57cec5SDimitry Andric init = trip - init; 19940b57cec5SDimitry Andric } else 19950b57cec5SDimitry Andric init = 0; 19960b57cec5SDimitry Andric limit = trip - __kmp_dispatch_guided_remaining<T>( 19970b57cec5SDimitry Andric trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1); 19980b57cec5SDimitry Andric KMP_ASSERT(init <= limit); 19990b57cec5SDimitry Andric if (init < limit) { 20000b57cec5SDimitry Andric KMP_DEBUG_ASSERT(limit <= trip); 20010b57cec5SDimitry Andric --limit; 20020b57cec5SDimitry Andric status = 1; 20030b57cec5SDimitry Andric break; 20040b57cec5SDimitry Andric } // if 20050b57cec5SDimitry Andric } // if 20060b57cec5SDimitry Andric } // while (1) 20070b57cec5SDimitry Andric #if KMP_USE_X87CONTROL 20080b57cec5SDimitry Andric /* restore FPCW if necessary 20090b57cec5SDimitry Andric AC: check fpcwSet flag first because oldFpcw can be uninitialized here 20100b57cec5SDimitry Andric */ 20110b57cec5SDimitry Andric if (fpcwSet && (oldFpcw & fpcwSet)) 20120b57cec5SDimitry Andric _control87(oldFpcw, _MCW_PC); 20130b57cec5SDimitry Andric #endif 20140b57cec5SDimitry Andric if (status != 0) { 20150b57cec5SDimitry Andric start = pr->u.p.lb; 20160b57cec5SDimitry Andric incr = pr->u.p.st; 20170b57cec5SDimitry Andric if (p_st != NULL) 20180b57cec5SDimitry Andric *p_st = incr; 20190b57cec5SDimitry Andric *p_lb = start + init * incr; 20200b57cec5SDimitry Andric *p_ub = start + limit * incr; 20210b57cec5SDimitry Andric if (pr->flags.ordered) { 20220b57cec5SDimitry Andric pr->u.p.ordered_lower = init; 20230b57cec5SDimitry Andric pr->u.p.ordered_upper = limit; 20240b57cec5SDimitry Andric } 20250b57cec5SDimitry Andric } else { 20260b57cec5SDimitry Andric *p_lb = 0; 20270b57cec5SDimitry Andric *p_ub = 0; 20280b57cec5SDimitry Andric if (p_st != NULL) 20290b57cec5SDimitry Andric *p_st = 0; 20300b57cec5SDimitry Andric } 20310b57cec5SDimitry Andric } // case 20320b57cec5SDimitry Andric break; 20330b57cec5SDimitry Andric 20340b57cec5SDimitry Andric case kmp_sch_trapezoidal: { 20350b57cec5SDimitry Andric UT index; 20360b57cec5SDimitry Andric T parm2 = pr->u.p.parm2; 20370b57cec5SDimitry Andric T parm3 = pr->u.p.parm3; 20380b57cec5SDimitry Andric T parm4 = pr->u.p.parm4; 20390b57cec5SDimitry Andric KD_TRACE(100, 20400b57cec5SDimitry Andric ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n", 20410b57cec5SDimitry Andric gtid)); 20420b57cec5SDimitry Andric 20430b57cec5SDimitry Andric index = test_then_inc<ST>((volatile ST *)&sh->u.s.iteration); 20440b57cec5SDimitry Andric 20450b57cec5SDimitry Andric init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2; 20460b57cec5SDimitry Andric trip = pr->u.p.tc - 1; 20470b57cec5SDimitry Andric 20480b57cec5SDimitry Andric if ((status = ((T)index < parm3 && init <= trip)) == 0) { 20490b57cec5SDimitry Andric *p_lb = 0; 20500b57cec5SDimitry Andric *p_ub = 0; 20510b57cec5SDimitry Andric if (p_st != NULL) 20520b57cec5SDimitry Andric *p_st = 0; 20530b57cec5SDimitry Andric } else { 20540b57cec5SDimitry Andric start = pr->u.p.lb; 20550b57cec5SDimitry Andric limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1; 20560b57cec5SDimitry Andric incr = pr->u.p.st; 20570b57cec5SDimitry Andric 20580b57cec5SDimitry Andric if ((last = (limit >= trip)) != 0) 20590b57cec5SDimitry Andric limit = trip; 20600b57cec5SDimitry Andric 20610b57cec5SDimitry Andric if (p_st != NULL) 20620b57cec5SDimitry Andric *p_st = incr; 20630b57cec5SDimitry Andric 20640b57cec5SDimitry Andric if (incr == 1) { 20650b57cec5SDimitry Andric *p_lb = start + init; 20660b57cec5SDimitry Andric *p_ub = start + limit; 20670b57cec5SDimitry Andric } else { 20680b57cec5SDimitry Andric *p_lb = start + init * incr; 20690b57cec5SDimitry Andric *p_ub = start + limit * incr; 20700b57cec5SDimitry Andric } 20710b57cec5SDimitry Andric 20720b57cec5SDimitry Andric if (pr->flags.ordered) { 20730b57cec5SDimitry Andric pr->u.p.ordered_lower = init; 20740b57cec5SDimitry Andric pr->u.p.ordered_upper = limit; 20750b57cec5SDimitry Andric } // if 20760b57cec5SDimitry Andric } // if 20770b57cec5SDimitry Andric } // case 20780b57cec5SDimitry Andric break; 20790b57cec5SDimitry Andric default: { 20800b57cec5SDimitry Andric status = 0; // to avoid complaints on uninitialized variable use 20810b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected), // Primary message 20820b57cec5SDimitry Andric KMP_HNT(GetNewerLibrary), // Hint 20830b57cec5SDimitry Andric __kmp_msg_null // Variadic argument list terminator 20840b57cec5SDimitry Andric ); 20850b57cec5SDimitry Andric } break; 20860b57cec5SDimitry Andric } // switch 20870b57cec5SDimitry Andric if (p_last) 20880b57cec5SDimitry Andric *p_last = last; 20890b57cec5SDimitry Andric #ifdef KMP_DEBUG 20900b57cec5SDimitry Andric if (pr->flags.ordered) { 20910b57cec5SDimitry Andric char *buff; 20920b57cec5SDimitry Andric // create format specifiers before the debug output 20930b57cec5SDimitry Andric buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d " 20940b57cec5SDimitry Andric "ordered_lower:%%%s ordered_upper:%%%s\n", 20950b57cec5SDimitry Andric traits_t<UT>::spec, traits_t<UT>::spec); 20960b57cec5SDimitry Andric KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper)); 20970b57cec5SDimitry Andric __kmp_str_free(&buff); 20980b57cec5SDimitry Andric } 20990b57cec5SDimitry Andric { 21000b57cec5SDimitry Andric char *buff; 21010b57cec5SDimitry Andric // create format specifiers before the debug output 21020b57cec5SDimitry Andric buff = __kmp_str_format( 21030b57cec5SDimitry Andric "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d " 21040b57cec5SDimitry Andric "p_lb:%%%s p_ub:%%%s p_st:%%%s\n", 21050b57cec5SDimitry Andric traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec); 2106fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(p_last); 2107fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(p_st); 21080b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st)); 21090b57cec5SDimitry Andric __kmp_str_free(&buff); 21100b57cec5SDimitry Andric } 21110b57cec5SDimitry Andric #endif 21120b57cec5SDimitry Andric return status; 21130b57cec5SDimitry Andric } 21140b57cec5SDimitry Andric 21150b57cec5SDimitry Andric /* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more 21160b57cec5SDimitry Andric work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini() 21170b57cec5SDimitry Andric is not called. */ 21180b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 21190b57cec5SDimitry Andric #define OMPT_LOOP_END \ 21200b57cec5SDimitry Andric if (status == 0) { \ 21210b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_work) { \ 21220b57cec5SDimitry Andric ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ 21230b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ 21240b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_work)( \ 2125*0fca6ea1SDimitry Andric ompt_get_work_schedule(pr->schedule), ompt_scope_end, \ 2126*0fca6ea1SDimitry Andric &(team_info->parallel_data), &(task_info->task_data), 0, codeptr); \ 21270b57cec5SDimitry Andric } \ 21280b57cec5SDimitry Andric } 212981ad6265SDimitry Andric #define OMPT_LOOP_DISPATCH(lb, ub, st, status) \ 213081ad6265SDimitry Andric if (ompt_enabled.ompt_callback_dispatch && status) { \ 213181ad6265SDimitry Andric ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ 213281ad6265SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ 213381ad6265SDimitry Andric ompt_dispatch_chunk_t chunk; \ 213481ad6265SDimitry Andric ompt_data_t instance = ompt_data_none; \ 213581ad6265SDimitry Andric OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, st); \ 213681ad6265SDimitry Andric instance.ptr = &chunk; \ 213781ad6265SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_dispatch)( \ 213881ad6265SDimitry Andric &(team_info->parallel_data), &(task_info->task_data), \ 213981ad6265SDimitry Andric ompt_dispatch_ws_loop_chunk, instance); \ 214081ad6265SDimitry Andric } 21410b57cec5SDimitry Andric // TODO: implement count 21420b57cec5SDimitry Andric #else 21430b57cec5SDimitry Andric #define OMPT_LOOP_END // no-op 214461cfbce3SDimitry Andric #define OMPT_LOOP_DISPATCH(lb, ub, st, status) // no-op 21450b57cec5SDimitry Andric #endif 21460b57cec5SDimitry Andric 21470b57cec5SDimitry Andric #if KMP_STATS_ENABLED 21480b57cec5SDimitry Andric #define KMP_STATS_LOOP_END \ 21490b57cec5SDimitry Andric { \ 21500b57cec5SDimitry Andric kmp_int64 u, l, t, i; \ 21510b57cec5SDimitry Andric l = (kmp_int64)(*p_lb); \ 21520b57cec5SDimitry Andric u = (kmp_int64)(*p_ub); \ 21530b57cec5SDimitry Andric i = (kmp_int64)(pr->u.p.st); \ 21540b57cec5SDimitry Andric if (status == 0) { \ 21550b57cec5SDimitry Andric t = 0; \ 21560b57cec5SDimitry Andric KMP_POP_PARTITIONED_TIMER(); \ 21570b57cec5SDimitry Andric } else if (i == 1) { \ 21580b57cec5SDimitry Andric if (u >= l) \ 21590b57cec5SDimitry Andric t = u - l + 1; \ 21600b57cec5SDimitry Andric else \ 21610b57cec5SDimitry Andric t = 0; \ 21620b57cec5SDimitry Andric } else if (i < 0) { \ 21630b57cec5SDimitry Andric if (l >= u) \ 21640b57cec5SDimitry Andric t = (l - u) / (-i) + 1; \ 21650b57cec5SDimitry Andric else \ 21660b57cec5SDimitry Andric t = 0; \ 21670b57cec5SDimitry Andric } else { \ 21680b57cec5SDimitry Andric if (u >= l) \ 21690b57cec5SDimitry Andric t = (u - l) / i + 1; \ 21700b57cec5SDimitry Andric else \ 21710b57cec5SDimitry Andric t = 0; \ 21720b57cec5SDimitry Andric } \ 21730b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t); \ 21740b57cec5SDimitry Andric } 21750b57cec5SDimitry Andric #else 21760b57cec5SDimitry Andric #define KMP_STATS_LOOP_END /* Nothing */ 21770b57cec5SDimitry Andric #endif 21780b57cec5SDimitry Andric 21790b57cec5SDimitry Andric template <typename T> 21800b57cec5SDimitry Andric static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, 21810b57cec5SDimitry Andric T *p_lb, T *p_ub, 21820b57cec5SDimitry Andric typename traits_t<T>::signed_t *p_st 21830b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 21840b57cec5SDimitry Andric , 21850b57cec5SDimitry Andric void *codeptr 21860b57cec5SDimitry Andric #endif 21870b57cec5SDimitry Andric ) { 21880b57cec5SDimitry Andric 21890b57cec5SDimitry Andric typedef typename traits_t<T>::unsigned_t UT; 21900b57cec5SDimitry Andric typedef typename traits_t<T>::signed_t ST; 21910b57cec5SDimitry Andric // This is potentially slightly misleading, schedule(runtime) will appear here 21925ffd83dbSDimitry Andric // even if the actual runtime schedule is static. (Which points out a 2193480093f4SDimitry Andric // disadvantage of schedule(runtime): even when static scheduling is used it 21940b57cec5SDimitry Andric // costs more than a compile time choice to use static scheduling would.) 21950b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling); 21960b57cec5SDimitry Andric 21970b57cec5SDimitry Andric int status; 21980b57cec5SDimitry Andric dispatch_private_info_template<T> *pr; 2199e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 22000b57cec5SDimitry Andric kmp_info_t *th = __kmp_threads[gtid]; 22010b57cec5SDimitry Andric kmp_team_t *team = th->th.th_team; 22020b57cec5SDimitry Andric 22030b57cec5SDimitry Andric KMP_DEBUG_ASSERT(p_lb && p_ub && p_st); // AC: these cannot be NULL 22040b57cec5SDimitry Andric KD_TRACE( 22050b57cec5SDimitry Andric 1000, 22060b57cec5SDimitry Andric ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n", 22070b57cec5SDimitry Andric gtid, p_lb, p_ub, p_st, p_last)); 22080b57cec5SDimitry Andric 22090b57cec5SDimitry Andric if (team->t.t_serialized) { 22105ffd83dbSDimitry Andric /* NOTE: serialize this dispatch because we are not at the active level */ 22110b57cec5SDimitry Andric pr = reinterpret_cast<dispatch_private_info_template<T> *>( 22120b57cec5SDimitry Andric th->th.th_dispatch->th_disp_buffer); /* top of the stack */ 22130b57cec5SDimitry Andric KMP_DEBUG_ASSERT(pr); 22140b57cec5SDimitry Andric 22150b57cec5SDimitry Andric if ((status = (pr->u.p.tc != 0)) == 0) { 22160b57cec5SDimitry Andric *p_lb = 0; 22170b57cec5SDimitry Andric *p_ub = 0; 22180b57cec5SDimitry Andric // if ( p_last != NULL ) 22190b57cec5SDimitry Andric // *p_last = 0; 22200b57cec5SDimitry Andric if (p_st != NULL) 22210b57cec5SDimitry Andric *p_st = 0; 22220b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 22230b57cec5SDimitry Andric if (pr->pushed_ws != ct_none) { 22240b57cec5SDimitry Andric pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); 22250b57cec5SDimitry Andric } 22260b57cec5SDimitry Andric } 22270b57cec5SDimitry Andric } else if (pr->flags.nomerge) { 22280b57cec5SDimitry Andric kmp_int32 last; 22290b57cec5SDimitry Andric T start; 22300b57cec5SDimitry Andric UT limit, trip, init; 22310b57cec5SDimitry Andric ST incr; 22320b57cec5SDimitry Andric T chunk = pr->u.p.parm1; 22330b57cec5SDimitry Andric 22340b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", 22350b57cec5SDimitry Andric gtid)); 22360b57cec5SDimitry Andric 22370b57cec5SDimitry Andric init = chunk * pr->u.p.count++; 22380b57cec5SDimitry Andric trip = pr->u.p.tc - 1; 22390b57cec5SDimitry Andric 22400b57cec5SDimitry Andric if ((status = (init <= trip)) == 0) { 22410b57cec5SDimitry Andric *p_lb = 0; 22420b57cec5SDimitry Andric *p_ub = 0; 22430b57cec5SDimitry Andric // if ( p_last != NULL ) 22440b57cec5SDimitry Andric // *p_last = 0; 22450b57cec5SDimitry Andric if (p_st != NULL) 22460b57cec5SDimitry Andric *p_st = 0; 22470b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 22480b57cec5SDimitry Andric if (pr->pushed_ws != ct_none) { 22490b57cec5SDimitry Andric pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); 22500b57cec5SDimitry Andric } 22510b57cec5SDimitry Andric } 22520b57cec5SDimitry Andric } else { 22530b57cec5SDimitry Andric start = pr->u.p.lb; 22540b57cec5SDimitry Andric limit = chunk + init - 1; 22550b57cec5SDimitry Andric incr = pr->u.p.st; 22560b57cec5SDimitry Andric 22570b57cec5SDimitry Andric if ((last = (limit >= trip)) != 0) { 22580b57cec5SDimitry Andric limit = trip; 22590b57cec5SDimitry Andric #if KMP_OS_WINDOWS 22600b57cec5SDimitry Andric pr->u.p.last_upper = pr->u.p.ub; 22610b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 22620b57cec5SDimitry Andric } 22630b57cec5SDimitry Andric if (p_last != NULL) 22640b57cec5SDimitry Andric *p_last = last; 22650b57cec5SDimitry Andric if (p_st != NULL) 22660b57cec5SDimitry Andric *p_st = incr; 22670b57cec5SDimitry Andric if (incr == 1) { 22680b57cec5SDimitry Andric *p_lb = start + init; 22690b57cec5SDimitry Andric *p_ub = start + limit; 22700b57cec5SDimitry Andric } else { 22710b57cec5SDimitry Andric *p_lb = start + init * incr; 22720b57cec5SDimitry Andric *p_ub = start + limit * incr; 22730b57cec5SDimitry Andric } 22740b57cec5SDimitry Andric 22750b57cec5SDimitry Andric if (pr->flags.ordered) { 22760b57cec5SDimitry Andric pr->u.p.ordered_lower = init; 22770b57cec5SDimitry Andric pr->u.p.ordered_upper = limit; 22780b57cec5SDimitry Andric #ifdef KMP_DEBUG 22790b57cec5SDimitry Andric { 22800b57cec5SDimitry Andric char *buff; 22810b57cec5SDimitry Andric // create format specifiers before the debug output 22820b57cec5SDimitry Andric buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " 22830b57cec5SDimitry Andric "ordered_lower:%%%s ordered_upper:%%%s\n", 22840b57cec5SDimitry Andric traits_t<UT>::spec, traits_t<UT>::spec); 22850b57cec5SDimitry Andric KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, 22860b57cec5SDimitry Andric pr->u.p.ordered_upper)); 22870b57cec5SDimitry Andric __kmp_str_free(&buff); 22880b57cec5SDimitry Andric } 22890b57cec5SDimitry Andric #endif 22900b57cec5SDimitry Andric } // if 22910b57cec5SDimitry Andric } // if 22920b57cec5SDimitry Andric } else { 22930b57cec5SDimitry Andric pr->u.p.tc = 0; 22940b57cec5SDimitry Andric *p_lb = pr->u.p.lb; 22950b57cec5SDimitry Andric *p_ub = pr->u.p.ub; 22960b57cec5SDimitry Andric #if KMP_OS_WINDOWS 22970b57cec5SDimitry Andric pr->u.p.last_upper = *p_ub; 22980b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 22990b57cec5SDimitry Andric if (p_last != NULL) 23000b57cec5SDimitry Andric *p_last = TRUE; 23010b57cec5SDimitry Andric if (p_st != NULL) 23020b57cec5SDimitry Andric *p_st = pr->u.p.st; 23030b57cec5SDimitry Andric } // if 23040b57cec5SDimitry Andric #ifdef KMP_DEBUG 23050b57cec5SDimitry Andric { 23060b57cec5SDimitry Andric char *buff; 23070b57cec5SDimitry Andric // create format specifiers before the debug output 23080b57cec5SDimitry Andric buff = __kmp_str_format( 23090b57cec5SDimitry Andric "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " 23100b57cec5SDimitry Andric "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n", 23110b57cec5SDimitry Andric traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec); 2312e8d8bef9SDimitry Andric KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, 2313e8d8bef9SDimitry Andric (p_last ? *p_last : 0), status)); 23140b57cec5SDimitry Andric __kmp_str_free(&buff); 23150b57cec5SDimitry Andric } 23160b57cec5SDimitry Andric #endif 23170b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 23180b57cec5SDimitry Andric SSC_MARK_DISPATCH_NEXT(); 23190b57cec5SDimitry Andric #endif 232081ad6265SDimitry Andric OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status); 23210b57cec5SDimitry Andric OMPT_LOOP_END; 23220b57cec5SDimitry Andric KMP_STATS_LOOP_END; 23230b57cec5SDimitry Andric return status; 23240b57cec5SDimitry Andric } else { 23250b57cec5SDimitry Andric kmp_int32 last = 0; 23260b57cec5SDimitry Andric dispatch_shared_info_template<T> volatile *sh; 23270b57cec5SDimitry Andric 23280b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th->th.th_dispatch == 23290b57cec5SDimitry Andric &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); 23300b57cec5SDimitry Andric 23310b57cec5SDimitry Andric pr = reinterpret_cast<dispatch_private_info_template<T> *>( 23320b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_pr_current); 23330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(pr); 23340b57cec5SDimitry Andric sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>( 23350b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_sh_current); 23360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sh); 23370b57cec5SDimitry Andric 23380b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 23390b57cec5SDimitry Andric if (pr->flags.use_hier) 23400b57cec5SDimitry Andric status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st); 23410b57cec5SDimitry Andric else 23420b57cec5SDimitry Andric #endif // KMP_USE_HIER_SCHED 23430b57cec5SDimitry Andric status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub, 23440b57cec5SDimitry Andric p_st, th->th.th_team_nproc, 23450b57cec5SDimitry Andric th->th.th_info.ds.ds_tid); 23460b57cec5SDimitry Andric // status == 0: no more iterations to execute 23470b57cec5SDimitry Andric if (status == 0) { 2348fe6060f1SDimitry Andric ST num_done; 2349fe6060f1SDimitry Andric num_done = test_then_inc<ST>(&sh->u.s.num_done); 23500b57cec5SDimitry Andric #ifdef KMP_DEBUG 23510b57cec5SDimitry Andric { 23520b57cec5SDimitry Andric char *buff; 23530b57cec5SDimitry Andric // create format specifiers before the debug output 23540b57cec5SDimitry Andric buff = __kmp_str_format( 23550b57cec5SDimitry Andric "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n", 2356fe6060f1SDimitry Andric traits_t<ST>::spec); 23570b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, sh->u.s.num_done)); 23580b57cec5SDimitry Andric __kmp_str_free(&buff); 23590b57cec5SDimitry Andric } 23600b57cec5SDimitry Andric #endif 23610b57cec5SDimitry Andric 23620b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 23630b57cec5SDimitry Andric pr->flags.use_hier = FALSE; 23640b57cec5SDimitry Andric #endif 2365fe6060f1SDimitry Andric if (num_done == th->th.th_team_nproc - 1) { 2366fe6060f1SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 2367fe6060f1SDimitry Andric if (pr->schedule == kmp_sch_static_steal) { 23680b57cec5SDimitry Andric int i; 23695ffd83dbSDimitry Andric int idx = (th->th.th_dispatch->th_disp_index - 1) % 23705ffd83dbSDimitry Andric __kmp_dispatch_num_buffers; // current loop index 23710b57cec5SDimitry Andric // loop complete, safe to destroy locks used for stealing 23720b57cec5SDimitry Andric for (i = 0; i < th->th.th_team_nproc; ++i) { 23735ffd83dbSDimitry Andric dispatch_private_info_template<T> *buf = 23745ffd83dbSDimitry Andric reinterpret_cast<dispatch_private_info_template<T> *>( 2375fe6060f1SDimitry Andric &team->t.t_dispatch[i].th_disp_buffer[idx]); 2376fe6060f1SDimitry Andric KMP_ASSERT(buf->steal_flag == THIEF); // buffer must be inactive 2377fe6060f1SDimitry Andric KMP_ATOMIC_ST_RLX(&buf->steal_flag, UNUSED); 2378fe6060f1SDimitry Andric if (traits_t<T>::type_size > 4) { 2379fe6060f1SDimitry Andric // destroy locks used for stealing 2380fe6060f1SDimitry Andric kmp_lock_t *lck = buf->u.p.steal_lock; 23810b57cec5SDimitry Andric KMP_ASSERT(lck != NULL); 23820b57cec5SDimitry Andric __kmp_destroy_lock(lck); 23830b57cec5SDimitry Andric __kmp_free(lck); 2384fe6060f1SDimitry Andric buf->u.p.steal_lock = NULL; 2385fe6060f1SDimitry Andric } 23860b57cec5SDimitry Andric } 23870b57cec5SDimitry Andric } 23880b57cec5SDimitry Andric #endif 2389fe6060f1SDimitry Andric /* NOTE: release shared buffer to be reused */ 23900b57cec5SDimitry Andric 23910b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 23920b57cec5SDimitry Andric 23930b57cec5SDimitry Andric sh->u.s.num_done = 0; 23940b57cec5SDimitry Andric sh->u.s.iteration = 0; 23950b57cec5SDimitry Andric 23960b57cec5SDimitry Andric /* TODO replace with general release procedure? */ 23970b57cec5SDimitry Andric if (pr->flags.ordered) { 23980b57cec5SDimitry Andric sh->u.s.ordered_iteration = 0; 23990b57cec5SDimitry Andric } 24000b57cec5SDimitry Andric 2401*0fca6ea1SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 2402*0fca6ea1SDimitry Andric 24030b57cec5SDimitry Andric sh->buffer_index += __kmp_dispatch_num_buffers; 24040b57cec5SDimitry Andric KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n", 24050b57cec5SDimitry Andric gtid, sh->buffer_index)); 24060b57cec5SDimitry Andric 24070b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 24080b57cec5SDimitry Andric 24090b57cec5SDimitry Andric } // if 24100b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 24110b57cec5SDimitry Andric if (pr->pushed_ws != ct_none) { 24120b57cec5SDimitry Andric pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); 24130b57cec5SDimitry Andric } 24140b57cec5SDimitry Andric } 24150b57cec5SDimitry Andric 24160b57cec5SDimitry Andric th->th.th_dispatch->th_deo_fcn = NULL; 24170b57cec5SDimitry Andric th->th.th_dispatch->th_dxo_fcn = NULL; 24180b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_sh_current = NULL; 24190b57cec5SDimitry Andric th->th.th_dispatch->th_dispatch_pr_current = NULL; 24200b57cec5SDimitry Andric } // if (status == 0) 24210b57cec5SDimitry Andric #if KMP_OS_WINDOWS 24220b57cec5SDimitry Andric else if (last) { 24230b57cec5SDimitry Andric pr->u.p.last_upper = pr->u.p.ub; 24240b57cec5SDimitry Andric } 24250b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 24260b57cec5SDimitry Andric if (p_last != NULL && status != 0) 24270b57cec5SDimitry Andric *p_last = last; 24280b57cec5SDimitry Andric } // if 24290b57cec5SDimitry Andric 24300b57cec5SDimitry Andric #ifdef KMP_DEBUG 24310b57cec5SDimitry Andric { 24320b57cec5SDimitry Andric char *buff; 24330b57cec5SDimitry Andric // create format specifiers before the debug output 24340b57cec5SDimitry Andric buff = __kmp_str_format( 24350b57cec5SDimitry Andric "__kmp_dispatch_next: T#%%d normal case: " 24360b57cec5SDimitry Andric "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n", 24370b57cec5SDimitry Andric traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec); 24380b57cec5SDimitry Andric KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, 24390b57cec5SDimitry Andric (p_last ? *p_last : 0), status)); 24400b57cec5SDimitry Andric __kmp_str_free(&buff); 24410b57cec5SDimitry Andric } 24420b57cec5SDimitry Andric #endif 24430b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 24440b57cec5SDimitry Andric SSC_MARK_DISPATCH_NEXT(); 24450b57cec5SDimitry Andric #endif 244681ad6265SDimitry Andric OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status); 24470b57cec5SDimitry Andric OMPT_LOOP_END; 24480b57cec5SDimitry Andric KMP_STATS_LOOP_END; 24490b57cec5SDimitry Andric return status; 24500b57cec5SDimitry Andric } 24510b57cec5SDimitry Andric 2452753f127fSDimitry Andric /*! 2453753f127fSDimitry Andric @ingroup WORK_SHARING 2454753f127fSDimitry Andric @param loc source location information 2455753f127fSDimitry Andric @param global_tid global thread number 2456753f127fSDimitry Andric @return Zero if the parallel region is not active and this thread should execute 2457753f127fSDimitry Andric all sections, non-zero otherwise. 2458753f127fSDimitry Andric 2459753f127fSDimitry Andric Beginning of sections construct. 2460753f127fSDimitry Andric There are no implicit barriers in the "sections" calls, rather the compiler 2461753f127fSDimitry Andric should introduce an explicit barrier if it is required. 2462753f127fSDimitry Andric 2463753f127fSDimitry Andric This implementation is based on __kmp_dispatch_init, using same constructs for 2464753f127fSDimitry Andric shared data (we can't have sections nested directly in omp for loop, there 2465753f127fSDimitry Andric should be a parallel region in between) 2466753f127fSDimitry Andric */ 2467753f127fSDimitry Andric kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid) { 2468753f127fSDimitry Andric 2469753f127fSDimitry Andric int active; 2470753f127fSDimitry Andric kmp_info_t *th; 2471753f127fSDimitry Andric kmp_team_t *team; 2472753f127fSDimitry Andric kmp_uint32 my_buffer_index; 2473753f127fSDimitry Andric dispatch_shared_info_template<kmp_int32> volatile *sh; 2474753f127fSDimitry Andric 2475753f127fSDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 2476753f127fSDimitry Andric 2477753f127fSDimitry Andric if (!TCR_4(__kmp_init_parallel)) 2478753f127fSDimitry Andric __kmp_parallel_initialize(); 2479753f127fSDimitry Andric __kmp_resume_if_soft_paused(); 2480753f127fSDimitry Andric 2481753f127fSDimitry Andric /* setup data */ 2482753f127fSDimitry Andric th = __kmp_threads[gtid]; 2483753f127fSDimitry Andric team = th->th.th_team; 2484753f127fSDimitry Andric active = !team->t.t_serialized; 2485753f127fSDimitry Andric th->th.th_ident = loc; 2486753f127fSDimitry Andric 2487753f127fSDimitry Andric KMP_COUNT_BLOCK(OMP_SECTIONS); 2488753f127fSDimitry Andric KD_TRACE(10, ("__kmpc_sections: called by T#%d\n", gtid)); 2489753f127fSDimitry Andric 2490753f127fSDimitry Andric if (active) { 2491753f127fSDimitry Andric // Setup sections in the same way as dynamic scheduled loops. 2492753f127fSDimitry Andric // We need one shared data: which section is to execute next. 2493753f127fSDimitry Andric // (in case parallel is not active, all sections will be executed on the 2494753f127fSDimitry Andric // same thread) 2495753f127fSDimitry Andric KMP_DEBUG_ASSERT(th->th.th_dispatch == 2496753f127fSDimitry Andric &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); 2497753f127fSDimitry Andric 2498753f127fSDimitry Andric my_buffer_index = th->th.th_dispatch->th_disp_index++; 2499753f127fSDimitry Andric 2500753f127fSDimitry Andric // reuse shared data structures from dynamic sched loops: 2501753f127fSDimitry Andric sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>( 2502753f127fSDimitry Andric &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); 2503753f127fSDimitry Andric KD_TRACE(10, ("__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid, 2504753f127fSDimitry Andric my_buffer_index)); 2505753f127fSDimitry Andric 2506753f127fSDimitry Andric th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error; 2507753f127fSDimitry Andric th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error; 2508753f127fSDimitry Andric 2509753f127fSDimitry Andric KD_TRACE(100, ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d " 2510753f127fSDimitry Andric "sh->buffer_index:%d\n", 2511753f127fSDimitry Andric gtid, my_buffer_index, sh->buffer_index)); 2512753f127fSDimitry Andric __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index, 2513753f127fSDimitry Andric __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL)); 2514753f127fSDimitry Andric // Note: KMP_WAIT() cannot be used there: buffer index and 2515753f127fSDimitry Andric // my_buffer_index are *always* 32-bit integers. 2516753f127fSDimitry Andric KMP_MB(); 2517753f127fSDimitry Andric KD_TRACE(100, ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d " 2518753f127fSDimitry Andric "sh->buffer_index:%d\n", 2519753f127fSDimitry Andric gtid, my_buffer_index, sh->buffer_index)); 2520753f127fSDimitry Andric 2521753f127fSDimitry Andric th->th.th_dispatch->th_dispatch_pr_current = 2522753f127fSDimitry Andric nullptr; // sections construct doesn't need private data 2523753f127fSDimitry Andric th->th.th_dispatch->th_dispatch_sh_current = 2524753f127fSDimitry Andric CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh); 2525753f127fSDimitry Andric } 2526753f127fSDimitry Andric 2527753f127fSDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 2528753f127fSDimitry Andric if (ompt_enabled.ompt_callback_work) { 2529753f127fSDimitry Andric ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 2530753f127fSDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 2531753f127fSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_work)( 2532753f127fSDimitry Andric ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data), 2533753f127fSDimitry Andric &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 2534753f127fSDimitry Andric } 2535753f127fSDimitry Andric #endif 2536753f127fSDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_sections); 2537753f127fSDimitry Andric 2538753f127fSDimitry Andric return active; 2539753f127fSDimitry Andric } 2540753f127fSDimitry Andric 2541753f127fSDimitry Andric /*! 2542753f127fSDimitry Andric @ingroup WORK_SHARING 2543753f127fSDimitry Andric @param loc source location information 2544753f127fSDimitry Andric @param global_tid global thread number 2545753f127fSDimitry Andric @param numberOfSections number of sections in the 'sections' construct 2546753f127fSDimitry Andric @return unsigned [from 0 to n) - number (id) of the section to execute next on 2547753f127fSDimitry Andric this thread. n (or any other number not in range) - nothing to execute on this 2548753f127fSDimitry Andric thread 2549753f127fSDimitry Andric */ 2550753f127fSDimitry Andric 2551753f127fSDimitry Andric kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid, 2552753f127fSDimitry Andric kmp_int32 numberOfSections) { 2553753f127fSDimitry Andric 2554bdd1243dSDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_sections_overhead); 2555753f127fSDimitry Andric 2556753f127fSDimitry Andric kmp_info_t *th = __kmp_threads[gtid]; 2557753f127fSDimitry Andric #ifdef KMP_DEBUG 2558753f127fSDimitry Andric kmp_team_t *team = th->th.th_team; 2559753f127fSDimitry Andric #endif 2560753f127fSDimitry Andric 2561753f127fSDimitry Andric KD_TRACE(1000, ("__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid, 2562753f127fSDimitry Andric numberOfSections)); 2563753f127fSDimitry Andric 2564753f127fSDimitry Andric // For serialized case we should not call this function: 2565753f127fSDimitry Andric KMP_DEBUG_ASSERT(!team->t.t_serialized); 2566753f127fSDimitry Andric 2567753f127fSDimitry Andric dispatch_shared_info_template<kmp_int32> volatile *sh; 2568753f127fSDimitry Andric 2569753f127fSDimitry Andric KMP_DEBUG_ASSERT(th->th.th_dispatch == 2570753f127fSDimitry Andric &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); 2571753f127fSDimitry Andric 2572753f127fSDimitry Andric KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current)); 2573753f127fSDimitry Andric sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>( 2574753f127fSDimitry Andric th->th.th_dispatch->th_dispatch_sh_current); 2575753f127fSDimitry Andric KMP_DEBUG_ASSERT(sh); 2576753f127fSDimitry Andric 2577753f127fSDimitry Andric kmp_int32 sectionIndex = 0; 2578753f127fSDimitry Andric bool moreSectionsToExecute = true; 2579753f127fSDimitry Andric 2580753f127fSDimitry Andric // Find section to execute: 2581753f127fSDimitry Andric sectionIndex = test_then_inc<kmp_int32>((kmp_int32 *)&sh->u.s.iteration); 2582753f127fSDimitry Andric if (sectionIndex >= numberOfSections) { 2583753f127fSDimitry Andric moreSectionsToExecute = false; 2584753f127fSDimitry Andric } 2585753f127fSDimitry Andric 2586753f127fSDimitry Andric // status == 0: no more sections to execute; 2587753f127fSDimitry Andric // OMPTODO: __kmpc_end_sections could be bypassed? 2588753f127fSDimitry Andric if (!moreSectionsToExecute) { 2589753f127fSDimitry Andric kmp_int32 num_done; 2590753f127fSDimitry Andric 2591753f127fSDimitry Andric num_done = test_then_inc<kmp_int32>((kmp_int32 *)(&sh->u.s.num_done)); 2592753f127fSDimitry Andric 2593753f127fSDimitry Andric if (num_done == th->th.th_team_nproc - 1) { 2594753f127fSDimitry Andric /* NOTE: release this buffer to be reused */ 2595753f127fSDimitry Andric 2596753f127fSDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 2597753f127fSDimitry Andric 2598753f127fSDimitry Andric sh->u.s.num_done = 0; 2599753f127fSDimitry Andric sh->u.s.iteration = 0; 2600753f127fSDimitry Andric 2601753f127fSDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 2602753f127fSDimitry Andric 2603753f127fSDimitry Andric sh->buffer_index += __kmp_dispatch_num_buffers; 2604753f127fSDimitry Andric KD_TRACE(100, ("__kmpc_next_section: T#%d change buffer_index:%d\n", gtid, 2605753f127fSDimitry Andric sh->buffer_index)); 2606753f127fSDimitry Andric 2607753f127fSDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 2608753f127fSDimitry Andric 2609753f127fSDimitry Andric } // if 2610753f127fSDimitry Andric 2611753f127fSDimitry Andric th->th.th_dispatch->th_deo_fcn = NULL; 2612753f127fSDimitry Andric th->th.th_dispatch->th_dxo_fcn = NULL; 2613753f127fSDimitry Andric th->th.th_dispatch->th_dispatch_sh_current = NULL; 2614753f127fSDimitry Andric th->th.th_dispatch->th_dispatch_pr_current = NULL; 2615753f127fSDimitry Andric 2616753f127fSDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 2617753f127fSDimitry Andric if (ompt_enabled.ompt_callback_dispatch) { 2618753f127fSDimitry Andric ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 2619753f127fSDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 2620753f127fSDimitry Andric ompt_data_t instance = ompt_data_none; 2621753f127fSDimitry Andric instance.ptr = OMPT_GET_RETURN_ADDRESS(0); 2622753f127fSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 2623753f127fSDimitry Andric &(team_info->parallel_data), &(task_info->task_data), 2624753f127fSDimitry Andric ompt_dispatch_section, instance); 2625753f127fSDimitry Andric } 2626753f127fSDimitry Andric #endif 2627753f127fSDimitry Andric } 2628753f127fSDimitry Andric 2629753f127fSDimitry Andric return sectionIndex; 2630753f127fSDimitry Andric } 2631753f127fSDimitry Andric 2632753f127fSDimitry Andric /*! 2633753f127fSDimitry Andric @ingroup WORK_SHARING 2634753f127fSDimitry Andric @param loc source location information 2635753f127fSDimitry Andric @param global_tid global thread number 2636753f127fSDimitry Andric 2637753f127fSDimitry Andric End of "sections" construct. 2638753f127fSDimitry Andric Don't need to wait here: barrier is added separately when needed. 2639753f127fSDimitry Andric */ 2640753f127fSDimitry Andric void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid) { 2641753f127fSDimitry Andric 2642753f127fSDimitry Andric kmp_info_t *th = __kmp_threads[gtid]; 2643753f127fSDimitry Andric int active = !th->th.th_team->t.t_serialized; 2644753f127fSDimitry Andric 2645753f127fSDimitry Andric KD_TRACE(100, ("__kmpc_end_sections: T#%d called\n", gtid)); 2646753f127fSDimitry Andric 2647753f127fSDimitry Andric if (!active) { 2648753f127fSDimitry Andric // In active case call finalization is done in __kmpc_next_section 2649753f127fSDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 2650753f127fSDimitry Andric if (ompt_enabled.ompt_callback_work) { 2651753f127fSDimitry Andric ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 2652753f127fSDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 2653753f127fSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_work)( 2654753f127fSDimitry Andric ompt_work_sections, ompt_scope_end, &(team_info->parallel_data), 2655753f127fSDimitry Andric &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 2656753f127fSDimitry Andric } 2657753f127fSDimitry Andric #endif 2658753f127fSDimitry Andric } 2659753f127fSDimitry Andric 2660bdd1243dSDimitry Andric KMP_POP_PARTITIONED_TIMER(); 2661753f127fSDimitry Andric KD_TRACE(100, ("__kmpc_end_sections: T#%d returned\n", gtid)); 2662753f127fSDimitry Andric } 2663753f127fSDimitry Andric 26640b57cec5SDimitry Andric template <typename T> 26650b57cec5SDimitry Andric static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid, 26660b57cec5SDimitry Andric kmp_int32 *plastiter, T *plower, T *pupper, 26670b57cec5SDimitry Andric typename traits_t<T>::signed_t incr) { 26680b57cec5SDimitry Andric typedef typename traits_t<T>::unsigned_t UT; 26690b57cec5SDimitry Andric kmp_uint32 team_id; 26700b57cec5SDimitry Andric kmp_uint32 nteams; 26710b57cec5SDimitry Andric UT trip_count; 26720b57cec5SDimitry Andric kmp_team_t *team; 26730b57cec5SDimitry Andric kmp_info_t *th; 26740b57cec5SDimitry Andric 26750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(plastiter && plower && pupper); 26760b57cec5SDimitry Andric KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid)); 26770b57cec5SDimitry Andric #ifdef KMP_DEBUG 26780b57cec5SDimitry Andric typedef typename traits_t<T>::signed_t ST; 26790b57cec5SDimitry Andric { 26800b57cec5SDimitry Andric char *buff; 26810b57cec5SDimitry Andric // create format specifiers before the debug output 26820b57cec5SDimitry Andric buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d " 26830b57cec5SDimitry Andric "iter=(%%%s, %%%s, %%%s) signed?<%s>\n", 26840b57cec5SDimitry Andric traits_t<T>::spec, traits_t<T>::spec, 26850b57cec5SDimitry Andric traits_t<ST>::spec, traits_t<T>::spec); 26860b57cec5SDimitry Andric KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr)); 26870b57cec5SDimitry Andric __kmp_str_free(&buff); 26880b57cec5SDimitry Andric } 26890b57cec5SDimitry Andric #endif 26900b57cec5SDimitry Andric 26910b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 26920b57cec5SDimitry Andric if (incr == 0) { 26930b57cec5SDimitry Andric __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 26940b57cec5SDimitry Andric loc); 26950b57cec5SDimitry Andric } 26960b57cec5SDimitry Andric if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 26970b57cec5SDimitry Andric // The loop is illegal. 26980b57cec5SDimitry Andric // Some zero-trip loops maintained by compiler, e.g.: 26990b57cec5SDimitry Andric // for(i=10;i<0;++i) // lower >= upper - run-time check 27000b57cec5SDimitry Andric // for(i=0;i>10;--i) // lower <= upper - run-time check 27010b57cec5SDimitry Andric // for(i=0;i>10;++i) // incr > 0 - compile-time check 27020b57cec5SDimitry Andric // for(i=10;i<0;--i) // incr < 0 - compile-time check 27030b57cec5SDimitry Andric // Compiler does not check the following illegal loops: 27040b57cec5SDimitry Andric // for(i=0;i<10;i+=incr) // where incr<0 27050b57cec5SDimitry Andric // for(i=10;i>0;i-=incr) // where incr<0 27060b57cec5SDimitry Andric __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 27070b57cec5SDimitry Andric } 27080b57cec5SDimitry Andric } 2709e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 27100b57cec5SDimitry Andric th = __kmp_threads[gtid]; 27110b57cec5SDimitry Andric team = th->th.th_team; 27120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 27130b57cec5SDimitry Andric nteams = th->th.th_teams_size.nteams; 27140b57cec5SDimitry Andric team_id = team->t.t_master_tid; 27150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 27160b57cec5SDimitry Andric 27170b57cec5SDimitry Andric // compute global trip count 27180b57cec5SDimitry Andric if (incr == 1) { 27190b57cec5SDimitry Andric trip_count = *pupper - *plower + 1; 27200b57cec5SDimitry Andric } else if (incr == -1) { 27210b57cec5SDimitry Andric trip_count = *plower - *pupper + 1; 27220b57cec5SDimitry Andric } else if (incr > 0) { 27230b57cec5SDimitry Andric // upper-lower can exceed the limit of signed type 27240b57cec5SDimitry Andric trip_count = (UT)(*pupper - *plower) / incr + 1; 27250b57cec5SDimitry Andric } else { 27260b57cec5SDimitry Andric trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 27270b57cec5SDimitry Andric } 27280b57cec5SDimitry Andric 27290b57cec5SDimitry Andric if (trip_count <= nteams) { 27300b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 27310b57cec5SDimitry Andric __kmp_static == kmp_sch_static_greedy || 27320b57cec5SDimitry Andric __kmp_static == 27330b57cec5SDimitry Andric kmp_sch_static_balanced); // Unknown static scheduling type. 27340b57cec5SDimitry Andric // only some teams get single iteration, others get nothing 27350b57cec5SDimitry Andric if (team_id < trip_count) { 27360b57cec5SDimitry Andric *pupper = *plower = *plower + team_id * incr; 27370b57cec5SDimitry Andric } else { 27380b57cec5SDimitry Andric *plower = *pupper + incr; // zero-trip loop 27390b57cec5SDimitry Andric } 27400b57cec5SDimitry Andric if (plastiter != NULL) 27410b57cec5SDimitry Andric *plastiter = (team_id == trip_count - 1); 27420b57cec5SDimitry Andric } else { 27430b57cec5SDimitry Andric if (__kmp_static == kmp_sch_static_balanced) { 27440b57cec5SDimitry Andric UT chunk = trip_count / nteams; 27450b57cec5SDimitry Andric UT extras = trip_count % nteams; 27460b57cec5SDimitry Andric *plower += 27470b57cec5SDimitry Andric incr * (team_id * chunk + (team_id < extras ? team_id : extras)); 27480b57cec5SDimitry Andric *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr); 27490b57cec5SDimitry Andric if (plastiter != NULL) 27500b57cec5SDimitry Andric *plastiter = (team_id == nteams - 1); 27510b57cec5SDimitry Andric } else { 27520b57cec5SDimitry Andric T chunk_inc_count = 27530b57cec5SDimitry Andric (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 27540b57cec5SDimitry Andric T upper = *pupper; 27550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 27560b57cec5SDimitry Andric // Unknown static scheduling type. 27570b57cec5SDimitry Andric *plower += team_id * chunk_inc_count; 27580b57cec5SDimitry Andric *pupper = *plower + chunk_inc_count - incr; 27590b57cec5SDimitry Andric // Check/correct bounds if needed 27600b57cec5SDimitry Andric if (incr > 0) { 27610b57cec5SDimitry Andric if (*pupper < *plower) 27620b57cec5SDimitry Andric *pupper = traits_t<T>::max_value; 27630b57cec5SDimitry Andric if (plastiter != NULL) 27640b57cec5SDimitry Andric *plastiter = *plower <= upper && *pupper > upper - incr; 27650b57cec5SDimitry Andric if (*pupper > upper) 27660b57cec5SDimitry Andric *pupper = upper; // tracker C73258 27670b57cec5SDimitry Andric } else { 27680b57cec5SDimitry Andric if (*pupper > *plower) 27690b57cec5SDimitry Andric *pupper = traits_t<T>::min_value; 27700b57cec5SDimitry Andric if (plastiter != NULL) 27710b57cec5SDimitry Andric *plastiter = *plower >= upper && *pupper < upper - incr; 27720b57cec5SDimitry Andric if (*pupper < upper) 27730b57cec5SDimitry Andric *pupper = upper; // tracker C73258 27740b57cec5SDimitry Andric } 27750b57cec5SDimitry Andric } 27760b57cec5SDimitry Andric } 27770b57cec5SDimitry Andric } 27780b57cec5SDimitry Andric 27790b57cec5SDimitry Andric //----------------------------------------------------------------------------- 27800b57cec5SDimitry Andric // Dispatch routines 27810b57cec5SDimitry Andric // Transfer call to template< type T > 27820b57cec5SDimitry Andric // __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule, 27830b57cec5SDimitry Andric // T lb, T ub, ST st, ST chunk ) 27840b57cec5SDimitry Andric extern "C" { 27850b57cec5SDimitry Andric 27860b57cec5SDimitry Andric /*! 27870b57cec5SDimitry Andric @ingroup WORK_SHARING 27880b57cec5SDimitry Andric @{ 27890b57cec5SDimitry Andric @param loc Source location 27900b57cec5SDimitry Andric @param gtid Global thread id 27910b57cec5SDimitry Andric @param schedule Schedule type 27920b57cec5SDimitry Andric @param lb Lower bound 27930b57cec5SDimitry Andric @param ub Upper bound 27940b57cec5SDimitry Andric @param st Step (or increment if you prefer) 27950b57cec5SDimitry Andric @param chunk The chunk size to block with 27960b57cec5SDimitry Andric 27970b57cec5SDimitry Andric This function prepares the runtime to start a dynamically scheduled for loop, 27980b57cec5SDimitry Andric saving the loop arguments. 27990b57cec5SDimitry Andric These functions are all identical apart from the types of the arguments. 28000b57cec5SDimitry Andric */ 28010b57cec5SDimitry Andric 28020b57cec5SDimitry Andric void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, 28030b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 lb, 28040b57cec5SDimitry Andric kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) { 28050b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28060b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 28070b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 28080b57cec5SDimitry Andric #endif 28090b57cec5SDimitry Andric __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true); 28100b57cec5SDimitry Andric } 28110b57cec5SDimitry Andric /*! 28120b57cec5SDimitry Andric See @ref __kmpc_dispatch_init_4 28130b57cec5SDimitry Andric */ 28140b57cec5SDimitry Andric void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, 28150b57cec5SDimitry Andric enum sched_type schedule, kmp_uint32 lb, 28160b57cec5SDimitry Andric kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) { 28170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28180b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 28190b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 28200b57cec5SDimitry Andric #endif 28210b57cec5SDimitry Andric __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true); 28220b57cec5SDimitry Andric } 28230b57cec5SDimitry Andric 28240b57cec5SDimitry Andric /*! 28250b57cec5SDimitry Andric See @ref __kmpc_dispatch_init_4 28260b57cec5SDimitry Andric */ 28270b57cec5SDimitry Andric void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, 28280b57cec5SDimitry Andric enum sched_type schedule, kmp_int64 lb, 28290b57cec5SDimitry Andric kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) { 28300b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28310b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 28320b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 28330b57cec5SDimitry Andric #endif 28340b57cec5SDimitry Andric __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true); 28350b57cec5SDimitry Andric } 28360b57cec5SDimitry Andric 28370b57cec5SDimitry Andric /*! 28380b57cec5SDimitry Andric See @ref __kmpc_dispatch_init_4 28390b57cec5SDimitry Andric */ 28400b57cec5SDimitry Andric void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, 28410b57cec5SDimitry Andric enum sched_type schedule, kmp_uint64 lb, 28420b57cec5SDimitry Andric kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) { 28430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28440b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 28450b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 28460b57cec5SDimitry Andric #endif 28470b57cec5SDimitry Andric __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true); 28480b57cec5SDimitry Andric } 28490b57cec5SDimitry Andric 28500b57cec5SDimitry Andric /*! 28510b57cec5SDimitry Andric See @ref __kmpc_dispatch_init_4 28520b57cec5SDimitry Andric 28530b57cec5SDimitry Andric Difference from __kmpc_dispatch_init set of functions is these functions 28540b57cec5SDimitry Andric are called for composite distribute parallel for construct. Thus before 28550b57cec5SDimitry Andric regular iterations dispatching we need to calc per-team iteration space. 28560b57cec5SDimitry Andric 28570b57cec5SDimitry Andric These functions are all identical apart from the types of the arguments. 28580b57cec5SDimitry Andric */ 28590b57cec5SDimitry Andric void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, 28600b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 *p_last, 28610b57cec5SDimitry Andric kmp_int32 lb, kmp_int32 ub, kmp_int32 st, 28620b57cec5SDimitry Andric kmp_int32 chunk) { 28630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28640b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 28650b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 28660b57cec5SDimitry Andric #endif 28670b57cec5SDimitry Andric __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st); 28680b57cec5SDimitry Andric __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true); 28690b57cec5SDimitry Andric } 28700b57cec5SDimitry Andric 28710b57cec5SDimitry Andric void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, 28720b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 *p_last, 28730b57cec5SDimitry Andric kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, 28740b57cec5SDimitry Andric kmp_int32 chunk) { 28750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28760b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 28770b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 28780b57cec5SDimitry Andric #endif 28790b57cec5SDimitry Andric __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st); 28800b57cec5SDimitry Andric __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true); 28810b57cec5SDimitry Andric } 28820b57cec5SDimitry Andric 28830b57cec5SDimitry Andric void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid, 28840b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 *p_last, 28850b57cec5SDimitry Andric kmp_int64 lb, kmp_int64 ub, kmp_int64 st, 28860b57cec5SDimitry Andric kmp_int64 chunk) { 28870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28880b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 28890b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 28900b57cec5SDimitry Andric #endif 28910b57cec5SDimitry Andric __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st); 28920b57cec5SDimitry Andric __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true); 28930b57cec5SDimitry Andric } 28940b57cec5SDimitry Andric 28950b57cec5SDimitry Andric void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, 28960b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 *p_last, 28970b57cec5SDimitry Andric kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, 28980b57cec5SDimitry Andric kmp_int64 chunk) { 28990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29000b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29010b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 29020b57cec5SDimitry Andric #endif 29030b57cec5SDimitry Andric __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st); 29040b57cec5SDimitry Andric __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true); 29050b57cec5SDimitry Andric } 29060b57cec5SDimitry Andric 29070b57cec5SDimitry Andric /*! 29080b57cec5SDimitry Andric @param loc Source code location 29090b57cec5SDimitry Andric @param gtid Global thread id 29100b57cec5SDimitry Andric @param p_last Pointer to a flag set to one if this is the last chunk or zero 29110b57cec5SDimitry Andric otherwise 29120b57cec5SDimitry Andric @param p_lb Pointer to the lower bound for the next chunk of work 29130b57cec5SDimitry Andric @param p_ub Pointer to the upper bound for the next chunk of work 29140b57cec5SDimitry Andric @param p_st Pointer to the stride for the next chunk of work 29150b57cec5SDimitry Andric @return one if there is work to be done, zero otherwise 29160b57cec5SDimitry Andric 29170b57cec5SDimitry Andric Get the next dynamically allocated chunk of work for this thread. 29180b57cec5SDimitry Andric If there is no more work, then the lb,ub and stride need not be modified. 29190b57cec5SDimitry Andric */ 29200b57cec5SDimitry Andric int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 29210b57cec5SDimitry Andric kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { 29220b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29230b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 29240b57cec5SDimitry Andric #endif 29250b57cec5SDimitry Andric return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st 29260b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29270b57cec5SDimitry Andric , 29280b57cec5SDimitry Andric OMPT_LOAD_RETURN_ADDRESS(gtid) 29290b57cec5SDimitry Andric #endif 29300b57cec5SDimitry Andric ); 29310b57cec5SDimitry Andric } 29320b57cec5SDimitry Andric 29330b57cec5SDimitry Andric /*! 29340b57cec5SDimitry Andric See @ref __kmpc_dispatch_next_4 29350b57cec5SDimitry Andric */ 29360b57cec5SDimitry Andric int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 29370b57cec5SDimitry Andric kmp_uint32 *p_lb, kmp_uint32 *p_ub, 29380b57cec5SDimitry Andric kmp_int32 *p_st) { 29390b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29400b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 29410b57cec5SDimitry Andric #endif 29420b57cec5SDimitry Andric return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st 29430b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29440b57cec5SDimitry Andric , 29450b57cec5SDimitry Andric OMPT_LOAD_RETURN_ADDRESS(gtid) 29460b57cec5SDimitry Andric #endif 29470b57cec5SDimitry Andric ); 29480b57cec5SDimitry Andric } 29490b57cec5SDimitry Andric 29500b57cec5SDimitry Andric /*! 29510b57cec5SDimitry Andric See @ref __kmpc_dispatch_next_4 29520b57cec5SDimitry Andric */ 29530b57cec5SDimitry Andric int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 29540b57cec5SDimitry Andric kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { 29550b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29560b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 29570b57cec5SDimitry Andric #endif 29580b57cec5SDimitry Andric return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st 29590b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29600b57cec5SDimitry Andric , 29610b57cec5SDimitry Andric OMPT_LOAD_RETURN_ADDRESS(gtid) 29620b57cec5SDimitry Andric #endif 29630b57cec5SDimitry Andric ); 29640b57cec5SDimitry Andric } 29650b57cec5SDimitry Andric 29660b57cec5SDimitry Andric /*! 29670b57cec5SDimitry Andric See @ref __kmpc_dispatch_next_4 29680b57cec5SDimitry Andric */ 29690b57cec5SDimitry Andric int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 29700b57cec5SDimitry Andric kmp_uint64 *p_lb, kmp_uint64 *p_ub, 29710b57cec5SDimitry Andric kmp_int64 *p_st) { 29720b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29730b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 29740b57cec5SDimitry Andric #endif 29750b57cec5SDimitry Andric return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st 29760b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29770b57cec5SDimitry Andric , 29780b57cec5SDimitry Andric OMPT_LOAD_RETURN_ADDRESS(gtid) 29790b57cec5SDimitry Andric #endif 29800b57cec5SDimitry Andric ); 29810b57cec5SDimitry Andric } 29820b57cec5SDimitry Andric 29830b57cec5SDimitry Andric /*! 29840b57cec5SDimitry Andric @param loc Source code location 29850b57cec5SDimitry Andric @param gtid Global thread id 29860b57cec5SDimitry Andric 29870b57cec5SDimitry Andric Mark the end of a dynamic loop. 29880b57cec5SDimitry Andric */ 29890b57cec5SDimitry Andric void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) { 29900b57cec5SDimitry Andric __kmp_dispatch_finish<kmp_uint32>(gtid, loc); 29910b57cec5SDimitry Andric } 29920b57cec5SDimitry Andric 29930b57cec5SDimitry Andric /*! 29940b57cec5SDimitry Andric See @ref __kmpc_dispatch_fini_4 29950b57cec5SDimitry Andric */ 29960b57cec5SDimitry Andric void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) { 29970b57cec5SDimitry Andric __kmp_dispatch_finish<kmp_uint64>(gtid, loc); 29980b57cec5SDimitry Andric } 29990b57cec5SDimitry Andric 30000b57cec5SDimitry Andric /*! 30010b57cec5SDimitry Andric See @ref __kmpc_dispatch_fini_4 30020b57cec5SDimitry Andric */ 30030b57cec5SDimitry Andric void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) { 30040b57cec5SDimitry Andric __kmp_dispatch_finish<kmp_uint32>(gtid, loc); 30050b57cec5SDimitry Andric } 30060b57cec5SDimitry Andric 30070b57cec5SDimitry Andric /*! 30080b57cec5SDimitry Andric See @ref __kmpc_dispatch_fini_4 30090b57cec5SDimitry Andric */ 30100b57cec5SDimitry Andric void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) { 30110b57cec5SDimitry Andric __kmp_dispatch_finish<kmp_uint64>(gtid, loc); 30120b57cec5SDimitry Andric } 3013*0fca6ea1SDimitry Andric 3014*0fca6ea1SDimitry Andric /*! 3015*0fca6ea1SDimitry Andric See @ref __kmpc_dispatch_deinit 3016*0fca6ea1SDimitry Andric */ 3017*0fca6ea1SDimitry Andric void __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 gtid) {} 30180b57cec5SDimitry Andric /*! @} */ 30190b57cec5SDimitry Andric 30200b57cec5SDimitry Andric //----------------------------------------------------------------------------- 30210b57cec5SDimitry Andric // Non-template routines from kmp_dispatch.cpp used in other sources 30220b57cec5SDimitry Andric 30230b57cec5SDimitry Andric kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) { 30240b57cec5SDimitry Andric return value == checker; 30250b57cec5SDimitry Andric } 30260b57cec5SDimitry Andric 30270b57cec5SDimitry Andric kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) { 30280b57cec5SDimitry Andric return value != checker; 30290b57cec5SDimitry Andric } 30300b57cec5SDimitry Andric 30310b57cec5SDimitry Andric kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) { 30320b57cec5SDimitry Andric return value < checker; 30330b57cec5SDimitry Andric } 30340b57cec5SDimitry Andric 30350b57cec5SDimitry Andric kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) { 30360b57cec5SDimitry Andric return value >= checker; 30370b57cec5SDimitry Andric } 30380b57cec5SDimitry Andric 30390b57cec5SDimitry Andric kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) { 30400b57cec5SDimitry Andric return value <= checker; 30410b57cec5SDimitry Andric } 30420b57cec5SDimitry Andric 30430b57cec5SDimitry Andric kmp_uint32 30440b57cec5SDimitry Andric __kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker, 30450b57cec5SDimitry Andric kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), 30460b57cec5SDimitry Andric void *obj // Higher-level synchronization object, or NULL. 30470b57cec5SDimitry Andric ) { 30480b57cec5SDimitry Andric // note: we may not belong to a team at this point 30490b57cec5SDimitry Andric volatile kmp_uint32 *spin = spinner; 30500b57cec5SDimitry Andric kmp_uint32 check = checker; 30510b57cec5SDimitry Andric kmp_uint32 spins; 30520b57cec5SDimitry Andric kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred; 30530b57cec5SDimitry Andric kmp_uint32 r; 305404eeddc0SDimitry Andric kmp_uint64 time; 30550b57cec5SDimitry Andric 30560b57cec5SDimitry Andric KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin)); 30570b57cec5SDimitry Andric KMP_INIT_YIELD(spins); 305804eeddc0SDimitry Andric KMP_INIT_BACKOFF(time); 30590b57cec5SDimitry Andric // main wait spin loop 30600b57cec5SDimitry Andric while (!f(r = TCR_4(*spin), check)) { 30610b57cec5SDimitry Andric KMP_FSYNC_SPIN_PREPARE(obj); 30620b57cec5SDimitry Andric /* GEH - remove this since it was accidentally introduced when kmp_wait was 30630b57cec5SDimitry Andric split. It causes problems with infinite recursion because of exit lock */ 30640b57cec5SDimitry Andric /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) 30650b57cec5SDimitry Andric __kmp_abort_thread(); */ 306604eeddc0SDimitry Andric KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); 30670b57cec5SDimitry Andric } 30680b57cec5SDimitry Andric KMP_FSYNC_SPIN_ACQUIRED(obj); 30690b57cec5SDimitry Andric return r; 30700b57cec5SDimitry Andric } 30710b57cec5SDimitry Andric 30720b57cec5SDimitry Andric void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker, 30730b57cec5SDimitry Andric kmp_uint32 (*pred)(void *, kmp_uint32), 30740b57cec5SDimitry Andric void *obj // Higher-level synchronization object, or NULL. 30750b57cec5SDimitry Andric ) { 30760b57cec5SDimitry Andric // note: we may not belong to a team at this point 30770b57cec5SDimitry Andric void *spin = spinner; 30780b57cec5SDimitry Andric kmp_uint32 check = checker; 30790b57cec5SDimitry Andric kmp_uint32 spins; 30800b57cec5SDimitry Andric kmp_uint32 (*f)(void *, kmp_uint32) = pred; 308104eeddc0SDimitry Andric kmp_uint64 time; 30820b57cec5SDimitry Andric 30830b57cec5SDimitry Andric KMP_FSYNC_SPIN_INIT(obj, spin); 30840b57cec5SDimitry Andric KMP_INIT_YIELD(spins); 308504eeddc0SDimitry Andric KMP_INIT_BACKOFF(time); 30860b57cec5SDimitry Andric // main wait spin loop 30870b57cec5SDimitry Andric while (!f(spin, check)) { 30880b57cec5SDimitry Andric KMP_FSYNC_SPIN_PREPARE(obj); 30890b57cec5SDimitry Andric /* if we have waited a bit, or are noversubscribed, yield */ 30900b57cec5SDimitry Andric /* pause is in the following code */ 309104eeddc0SDimitry Andric KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); 30920b57cec5SDimitry Andric } 30930b57cec5SDimitry Andric KMP_FSYNC_SPIN_ACQUIRED(obj); 30940b57cec5SDimitry Andric } 30950b57cec5SDimitry Andric 30960b57cec5SDimitry Andric } // extern "C" 30970b57cec5SDimitry Andric 30980b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT 30990b57cec5SDimitry Andric 31000b57cec5SDimitry Andric void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid, 31010b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 lb, 31020b57cec5SDimitry Andric kmp_int32 ub, kmp_int32 st, kmp_int32 chunk, 31030b57cec5SDimitry Andric int push_ws) { 31040b57cec5SDimitry Andric __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, 31050b57cec5SDimitry Andric push_ws); 31060b57cec5SDimitry Andric } 31070b57cec5SDimitry Andric 31080b57cec5SDimitry Andric void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, 31090b57cec5SDimitry Andric enum sched_type schedule, kmp_uint32 lb, 31100b57cec5SDimitry Andric kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk, 31110b57cec5SDimitry Andric int push_ws) { 31120b57cec5SDimitry Andric __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, 31130b57cec5SDimitry Andric push_ws); 31140b57cec5SDimitry Andric } 31150b57cec5SDimitry Andric 31160b57cec5SDimitry Andric void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid, 31170b57cec5SDimitry Andric enum sched_type schedule, kmp_int64 lb, 31180b57cec5SDimitry Andric kmp_int64 ub, kmp_int64 st, kmp_int64 chunk, 31190b57cec5SDimitry Andric int push_ws) { 31200b57cec5SDimitry Andric __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, 31210b57cec5SDimitry Andric push_ws); 31220b57cec5SDimitry Andric } 31230b57cec5SDimitry Andric 31240b57cec5SDimitry Andric void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, 31250b57cec5SDimitry Andric enum sched_type schedule, kmp_uint64 lb, 31260b57cec5SDimitry Andric kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk, 31270b57cec5SDimitry Andric int push_ws) { 31280b57cec5SDimitry Andric __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, 31290b57cec5SDimitry Andric push_ws); 31300b57cec5SDimitry Andric } 31310b57cec5SDimitry Andric 31320b57cec5SDimitry Andric void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) { 31330b57cec5SDimitry Andric __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc); 31340b57cec5SDimitry Andric } 31350b57cec5SDimitry Andric 31360b57cec5SDimitry Andric void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) { 31370b57cec5SDimitry Andric __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc); 31380b57cec5SDimitry Andric } 31390b57cec5SDimitry Andric 31400b57cec5SDimitry Andric void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) { 31410b57cec5SDimitry Andric __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc); 31420b57cec5SDimitry Andric } 31430b57cec5SDimitry Andric 31440b57cec5SDimitry Andric void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) { 31450b57cec5SDimitry Andric __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc); 31460b57cec5SDimitry Andric } 31470b57cec5SDimitry Andric 31480b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */ 31490b57cec5SDimitry Andric 31500b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3151