Lines Matching +full:ecx +full:- +full:2000
2 * kmp_runtime.cpp -- KPTS runtime support library
5 //===----------------------------------------------------------------------===//
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
33 #include "ompt-specific.h"
36 #include "ompd-specific.h"
76 /* ------------------------------------------------------------------------ */
121 int new_size = level + thr->th.th_set_nested_nth_sz;
122 new_nested_nth->nth = (int *)KMP_INTERNAL_MALLOC(new_size * sizeof(int));
124 new_nested_nth->nth[i] = 0;
126 new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
127 new_nested_nth->size = new_nested_nth->used = new_size;
147 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
149 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
173 non-NULL.
179 TODO: Fix it. --ln */
187 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
188 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
190 /* stack grows down -- search through all of the active threads */
193 size_t stack_diff = stack_base - stack_addr;
201 // thread-specific data will be reset to NULL.
229 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
233 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
235 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
236 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
237 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
240 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
241 stack_base - stack_addr);
246 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
247 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
249 other_threads[i]->th.th_info.ds.ds_stacksize,
306 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
307 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
313 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
315 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
318 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
320 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
331 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
332 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
340 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
342 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
349 -1, other_stack_beg, other_stack_end,
350 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
362 /* ------------------------------------------------------------------------ */
387 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
399 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
400 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
416 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
420 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
421 (char *)p1 + (page_size - 1),
424 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
425 (char *)p2 + (page_size - 1),
471 /* On Windows* OS by default abort() causes pop-up error box, which stalls
472 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
479 avoid pop-up error box. */
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
508 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
512 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
515 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
516 &thr->th.th_bar[bs_plain_barrier + 1],
520 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
521 &thr->th.th_bar[bs_forkjoin_barrier + 1],
526 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
527 &thr->th.th_bar[bs_reduction_barrier + 1],
538 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
539 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
542 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
543 &team->t.t_bar[bs_last_barrier],
547 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
548 &team->t.t_bar[bs_plain_barrier + 1],
552 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
553 &team->t.t_bar[bs_forkjoin_barrier + 1],
558 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
559 &team->t.t_bar[bs_reduction_barrier + 1],
565 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
569 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
572 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
573 &team->t.t_disp_buffer[num_disp_buff],
584 /* ------------------------------------------------------------------------ */
592 /* ------------------------------------------------------------------------ */
646 /* __kmp_parallel_deo -- Wait until it's our turn. */
654 if (__kmp_threads[gtid]->th.th_root->r.r_active)
662 if (!team->t.t_serialized) {
664 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
671 /* __kmp_parallel_dxo -- Signal the next task. */
680 if (__kmp_threads[gtid]->th.th_root->r.r_active)
684 if (!team->t.t_serialized) {
689 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
696 /* ------------------------------------------------------------------------ */
709 team = th->th.th_team;
712 th->th.th_ident = id_ref;
714 if (team->t.t_serialized) {
717 kmp_int32 old_this = th->th.th_local.this_construct;
719 ++th->th.th_local.this_construct;
720 /* try to set team count to thread count--success means thread got the
723 if (team->t.t_construct == old_this) {
724 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
725 th->th.th_local.this_construct);
729 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
730 team->t.t_active_level == 1) {
773 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
775 // If dyn-var is set, dynamically adjust the number of desired threads,
798 new_nthreads = __kmp_avail_proc - __kmp_nth +
799 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
815 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
834 if (__kmp_nth + new_nthreads -
835 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
837 int tl_nthreads = __kmp_max_nth - __kmp_nth +
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
843 // If dyn-var is false, emit a 1-time warning.
863 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
864 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
865 if (cg_nthreads + new_nthreads -
866 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
868 int tl_nthreads = max_cg_threads - cg_nthreads +
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
874 // If dyn-var is false, emit a 1-time warning.
898 --capacity;
904 capacity -= __kmp_hidden_helper_threads_num;
906 if (__kmp_nth + new_nthreads -
907 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
910 int slotsRequired = __kmp_nth + new_nthreads -
911 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
916 new_nthreads -= (slotsRequired - slotsAdded);
919 // If dyn-var is false, emit a 1-time warning.
949 if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
950 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
951 this_thr->th.th_nt_msg);
965 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
970 master_th->th.th_info.ds.ds_tid = 0;
971 master_th->th.th_team = team;
972 master_th->th.th_team_nproc = team->t.t_nproc;
973 master_th->th.th_team_master = master_th;
974 master_th->th.th_team_serialized = FALSE;
975 master_th->th.th_dispatch = &team->t.t_dispatch[0];
980 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
983 int level = team->t.t_active_level - 1; // index in array of hot teams
984 if (master_th->th.th_teams_microtask) { // are we inside the teams?
985 if (master_th->th.th_teams_size.nteams > 1) {
989 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
990 master_th->th.th_teams_level == team->t.t_level) {
993 } // team->t.t_level will be increased inside parallel
1003 hot_teams[level].hot_team_nth = team->t.t_nproc;
1010 use_hot_team = team == root->r.r_hot_team;
1015 team->t.t_threads[0] = master_th;
1019 for (i = 1; i < team->t.t_nproc; i++) {
1023 team->t.t_threads[i] = thr;
1025 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1029 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1030 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1031 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1032 team->t.t_bar[bs_plain_barrier].b_arrived));
1033 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1034 thr->th.th_teams_level = master_th->th.th_teams_level;
1035 thr->th.th_teams_size = master_th->th.th_teams_size;
1038 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1040 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1043 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1058 if (team->t.t_nproc > 1 &&
1060 team->t.b->update_num_threads(team->t.t_nproc);
1061 __kmp_add_threads_to_team(team, team->t.t_nproc);
1068 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1073 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1074 team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1078 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1079 master_th->th.th_task_state);
1083 if (team->t.t_nproc > 1) {
1084 KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
1085 team->t.t_threads[1]->th.th_task_state == 1);
1086 KMP_CHECK_UPDATE(master_th->th.th_task_state,
1087 team->t.t_threads[1]->th.th_task_state);
1089 master_th->th.th_task_state = 0;
1093 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1094 master_th->th.th_task_state);
1096 master_th->th.th_task_state = 0;
1100 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1101 for (i = 0; i < team->t.t_nproc; i++) {
1102 kmp_info_t *thr = team->t.t_threads[i];
1103 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1104 thr->th.th_prev_level != team->t.t_level) {
1105 team->t.t_display_affinity = 1;
1135 // writes that would put the cache-line into a written state, causing all
1137 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1138 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1141 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1143 // Similarly here. Don't write to this cache-line in the team structure
1145 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1152 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1161 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1163 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1166 if (team->t.t_mxcsr != mxcsr) {
1167 __kmp_load_mxcsr(&team->t.t_mxcsr);
1189 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1197 serial_team = this_thr->th.th_serial_team;
1203 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1204 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1208 // of proc-bind-var for this parallel region.
1209 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1212 this_thr->th.th_set_proc_bind = proc_bind_default;
1215 this_thr->th.th_set_nproc = 0;
1221 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1226 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1231 &(parent_task_info->task_data), &(parent_task_info->frame),
1238 if (this_thr->th.th_team != serial_team) {
1240 int level = this_thr->th.th_team->t.t_level;
1242 if (serial_team->t.t_serialized) {
1250 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1254 proc_bind, &this_thr->th.th_current_task->td_icvs,
1260 new_team->t.t_threads[0] = this_thr;
1261 new_team->t.t_parent = this_thr->th.th_team;
1263 this_thr->th.th_serial_team = serial_team;
1281 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1282 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1283 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1284 serial_team->t.t_ident = loc;
1285 serial_team->t.t_serialized = 1;
1286 serial_team->t.t_nproc = 1;
1287 serial_team->t.t_parent = this_thr->th.th_team;
1288 if (this_thr->th.th_team->t.t_nested_nth)
1289 serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
1291 serial_team->t.t_nested_nth = &__kmp_nested_nth;
1293 serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1294 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1295 this_thr->th.th_team = serial_team;
1296 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1299 this_thr->th.th_current_task));
1300 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1301 this_thr->th.th_current_task->td_flags.executing = 0;
1307 team->t.t_serialized? */
1308 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1309 &this_thr->th.th_current_task->td_parent->td_icvs);
1314 if (this_thr->th.th_team->t.t_nested_nth)
1315 nested_nth = this_thr->th.th_team->t.t_nested_nth;
1316 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1317 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1322 this_thr->th.th_current_task->td_icvs.proc_bind =
1327 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
1329 this_thr->th.th_info.ds.ds_tid = 0;
1332 this_thr->th.th_team_nproc = 1;
1333 this_thr->th.th_team_master = this_thr;
1334 this_thr->th.th_team_serialized = 1;
1335 this_thr->th.th_task_team = NULL;
1336 this_thr->th.th_task_state = 0;
1338 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1339 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1340 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1345 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1346 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1347 serial_team->t.t_dispatch->th_disp_buffer =
1351 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1358 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1359 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1360 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1361 ++serial_team->t.t_serialized;
1362 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1365 int level = this_thr->th.th_team->t.t_level;
1370 if (serial_team->t.t_nested_nth)
1371 nested_nth = serial_team->t.t_nested_nth;
1372 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1373 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1376 serial_team->t.t_level++;
1379 global_tid, serial_team, serial_team->t.t_level));
1382 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1387 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1388 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1390 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1397 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1402 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1403 this_thr->th.th_prev_num_threads != 1) {
1404 // NULL means use the affinity-format-var ICV
1406 this_thr->th.th_prev_level = serial_team->t.t_level;
1407 this_thr->th.th_prev_num_threads = 1;
1414 serial_team->t.ompt_team_info.master_return_address = codeptr;
1416 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1417 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1433 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1438 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1439 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1449 return (master_th->th.th_teams_microtask && ap &&
1476 parent_team->t.t_ident = loc;
1478 parent_team->t.t_argc = argc;
1479 argv = (void **)parent_team->t.t_argv;
1480 for (i = argc - 1; i >= 0; --i) {
1484 if (parent_team == master_th->th.th_serial_team) {
1487 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1492 parent_team->t.t_serialized--;
1497 parent_team->t.t_pkfn = microtask;
1517 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1520 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1524 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1532 parent_team->t.t_serialized--;
1537 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1548 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1552 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1561 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1567 parent_team->t.t_pkfn = microtask;
1568 parent_team->t.t_invoke = invoker;
1569 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1570 parent_team->t.t_active_level++;
1571 parent_team->t.t_level++;
1572 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1579 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1592 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1594 kmp_info_t **other_threads = parent_team->t.t_threads;
1597 int old_proc = master_th->th.th_teams_size.nth;
1602 parent_team->t.t_nproc = master_set_numthreads;
1604 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1608 master_th->th.th_set_nproc = 0;
1621 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1624 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1627 // No proc_bind clause specified; use current proc-bind-var
1629 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1632 This overrides proc-bind-var for this parallel region, but does not
1633 change proc-bind-var. */
1634 // Figure the value of proc-bind-var for the child threads.
1637 master_th->th.th_current_task->td_icvs.proc_bind)) {
1641 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1642 // Need to change the bind-var ICV to correct value for each implicit task
1644 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1645 kmp_info_t **other_threads = parent_team->t.t_threads;
1646 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1647 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1651 master_th->th.th_set_proc_bind = proc_bind_default;
1657 parent_team->t.t_active_level == 1 // only report frames at level 1
1658 && master_th->th.th_teams_size.nteams == 1) {
1660 master_th->th.th_frame_time = tmp_time;
1661 parent_team->t.t_region_time = tmp_time;
1664 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1666 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1686 parent_team->t.t_id, parent_team->t.t_pkfn));
1688 if (!parent_team->t.t_invoke(gtid)) {
1692 parent_team->t.t_id, parent_team->t.t_pkfn));
1729 master_th->th.th_serial_team->t.t_pkfn = microtask;
1734 master_th->th.th_serial_team->t.t_ident = loc;
1737 master_th->th.th_serial_team->t.t_level--;
1753 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1755 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1758 &(task_info->task_data), 1,
1759 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1763 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1772 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1785 ompt_scope_end, NULL, &(task_info->task_data), 1,
1786 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1795 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1799 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1800 team = master_th->th.th_team;
1801 // team->t.t_pkfn = microtask;
1802 team->t.t_invoke = invoker;
1804 team->t.t_argc = argc;
1805 argv = (void **)team->t.t_argv;
1806 for (i = argc - 1; i >= 0; --i)
1810 team->t.t_level--;
1818 ompt_scope_end, NULL, &(task_info->task_data), 0,
1819 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1827 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1832 for (i = argc - 1; i >= 0; --i)
1849 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1858 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1862 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1884 ompt_scope_end, NULL, &(task_info->task_data), 1,
1885 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1895 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1975 parent_team = master_th->th.th_team;
1976 master_tid = master_th->th.th_info.ds.ds_tid;
1977 master_this_cons = master_th->th.th_local.this_construct;
1978 root = master_th->th.th_root;
1979 master_active = root->r.r_active;
1980 master_set_numthreads = master_th->th.th_set_nproc;
1982 master_th->th.th_current_task->td_icvs.task_thread_limit;
2001 level = parent_team->t.t_level;
2002 // used to launch non-serial teams even if nested is not allowed
2003 active_level = parent_team->t.t_active_level;
2005 teams_level = master_th->th.th_teams_level;
2007 p_hot_teams = &master_th->th.th_hot_teams;
2011 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
2031 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2035 master_th->th.th_ident = loc;
2058 (parent_team->t.t_active_level >=
2059 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2085 // Free lock for single thread execution here; for multi-thread
2095 master_th->th.th_set_nproc = 0;
2111 parent_team->t.t_active_level, master_th,
2112 master_th->th.th_current_task,
2113 master_th->th.th_current_task->td_icvs.max_active_levels));
2114 // TODO: GEH - cannot do this assertion because root thread not set up as
2116 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2117 master_th->th.th_current_task->td_flags.executing = 0;
2119 if (!master_th->th.th_teams_microtask || level > teams_level) {
2121 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2125 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2127 if (!master_th->th.th_set_nested_nth &&
2128 (level + 1 < parent_team->t.t_nested_nth->used) &&
2129 (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
2130 nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
2131 } else if (master_th->th.th_set_nested_nth) {
2133 if ((level + 1 < nested_nth->used) &&
2134 (nested_nth->nth[level + 1] != nthreads_icv))
2135 nthreads_icv = nested_nth->nth[level + 1];
2143 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2146 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2149 // No proc_bind clause specified; use current proc-bind-var for this
2152 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2155 if (master_th->th.th_teams_microtask &&
2160 This overrides proc-bind-var for this parallel region, but does not
2161 change proc-bind-var. */
2162 // Figure the value of proc-bind-var for the child threads.
2165 master_th->th.th_current_task->td_icvs.proc_bind)) {
2168 if (!master_th->th.th_teams_microtask ||
2175 master_th->th.th_set_proc_bind = proc_bind_default;
2179 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2197 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2206 &master_th->th.th_current_task->td_icvs,
2209 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2210 &master_th->th.th_current_task->td_icvs);
2213 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2216 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2217 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2218 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2219 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2220 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2222 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2225 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2226 // TODO: parent_team->t.t_level == INT_MAX ???
2227 if (!master_th->th.th_teams_microtask || level > teams_level) {
2228 int new_level = parent_team->t.t_level + 1;
2229 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2230 new_level = parent_team->t.t_active_level + 1;
2231 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2234 int new_level = parent_team->t.t_level;
2235 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2236 new_level = parent_team->t.t_active_level;
2237 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2240 // set primary thread's schedule as new run-time schedule
2241 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2243 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2244 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2247 if (team->t.t_nested_nth &&
2248 team->t.t_nested_nth != parent_team->t.t_nested_nth) {
2249 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
2250 KMP_INTERNAL_FREE(team->t.t_nested_nth);
2251 team->t.t_nested_nth = NULL;
2253 team->t.t_nested_nth = parent_team->t.t_nested_nth;
2254 if (master_th->th.th_set_nested_nth) {
2257 team->t.t_nested_nth = nested_nth;
2258 KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
2259 master_th->th.th_set_nested_nth = NULL;
2260 master_th->th.th_set_nested_nth_sz = 0;
2261 master_th->th.th_nt_strict = false;
2273 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2274 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2275 team->t.t_nproc));
2276 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2277 (team->t.t_master_tid == 0 &&
2278 (team->t.t_parent == root->r.r_root_team ||
2279 team->t.t_parent->t.t_serialized)));
2283 argv = (void **)team->t.t_argv;
2285 for (i = argc - 1; i >= 0; --i) {
2293 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2298 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2299 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2300 root->r.r_active = TRUE;
2304 &master_th->th.th_current_task->td_icvs, loc);
2307 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2313 if (team->t.t_active_level == 1 // only report frames at level 1
2314 && !master_th->th.th_teams_microtask) { // not in teams construct
2322 // Internal fork - report frame begin
2323 master_th->th.th_frame_time = tmp_time;
2325 team->t.t_region_time = tmp_time;
2332 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2338 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2348 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2349 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2350 } else if (parent_team->t.t_serialized) {
2355 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2356 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2377 team->t.t_id, team->t.t_pkfn));
2388 if (!team->t.t_invoke(gtid)) {
2400 team->t.t_id, team->t.t_pkfn));
2406 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2417 thread->th.ompt_thread_info.state =
2418 ((team->t.t_serialized) ? ompt_state_work_serial
2428 parallel_data, &(task_info->task_data), flags, codeptr);
2431 task_info->frame.enter_frame = ompt_data_none;
2454 root = master_th->th.th_root;
2455 team = master_th->th.th_team;
2456 parent_team = team->t.t_parent;
2458 master_th->th.th_ident = loc;
2461 void *team_microtask = (void *)team->t.t_pkfn;
2463 // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2464 // and end-parallel events.
2466 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2467 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2476 team->t.t_task_team[master_th->th.th_task_state],
2477 master_th->th.th_task_team));
2482 if (team->t.t_serialized) {
2483 if (master_th->th.th_teams_microtask) {
2485 int level = team->t.t_level;
2486 int tlevel = master_th->th.th_teams_level;
2489 // so do it here - at the end of teams construct
2490 team->t.t_level++;
2495 team->t.t_serialized++;
2512 master_active = team->t.t_master_active;
2520 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2522 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2523 team->t.t_stack_id = NULL;
2527 master_th->th.th_task_state =
2530 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2531 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2535 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2536 parent_team->t.t_stack_id = NULL;
2544 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2545 void *codeptr = team->t.ompt_team_info.master_return_address;
2550 if (team->t.t_active_level == 1 &&
2551 (!master_th->th.th_teams_microtask || /* not in teams construct */
2552 master_th->th.th_teams_size.nteams == 1)) {
2553 master_th->th.th_ident = loc;
2558 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2559 master_th->th.th_frame_time, 0, loc,
2560 master_th->th.th_team_nproc, 1);
2570 master_th->th.th_first_place = team->t.t_first_place;
2571 master_th->th.th_last_place = team->t.t_last_place;
2575 if (master_th->th.th_teams_microtask && !exit_teams &&
2576 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2577 team->t.t_level == master_th->th.th_teams_level + 1) {
2586 int ompt_team_size = team->t.t_nproc;
2588 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2589 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2591 task_info->frame.exit_frame = ompt_data_none;
2592 task_info->task_data = ompt_data_none;
2598 team->t.t_level--;
2599 team->t.t_active_level--;
2600 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2606 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2607 int old_num = master_th->th.th_team_nproc;
2608 int new_num = master_th->th.th_teams_size.nth;
2609 kmp_info_t **other_threads = team->t.t_threads;
2610 team->t.t_nproc = new_num;
2612 other_threads[i]->th.th_team_nproc = new_num;
2614 // Adjust states of non-used threads of the team
2616 // Re-initialize thread's barrier data.
2618 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2620 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2623 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2628 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2644 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2645 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2647 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2654 if (!master_th->th.th_teams_microtask ||
2655 team->t.t_level > master_th->th.th_teams_level) {
2657 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2659 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2668 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2670 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2671 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2673 task_info->frame.exit_frame = ompt_data_none;
2674 task_info->task_data = ompt_data_none;
2682 master_th->th.th_def_allocator = team->t.t_def_allocator;
2690 if (root->r.r_active != master_active)
2691 root->r.r_active = master_active;
2701 master_th->th.th_team = parent_team;
2702 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2703 master_th->th.th_team_master = parent_team->t.t_threads[0];
2704 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2707 if (parent_team->t.t_serialized &&
2708 parent_team != master_th->th.th_serial_team &&
2709 parent_team != root->r.r_root_team) {
2711 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2712 master_th->th.th_serial_team = parent_team;
2717 KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
2718 team->t.t_primary_task_state == 1);
2719 master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2722 master_th->th.th_task_team =
2723 parent_team->t.t_task_team[master_th->th.th_task_state];
2726 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2730 // TODO: GEH - cannot do this assertion because root thread not set up as
2732 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2733 master_th->th.th_current_task->td_flags.executing = 1;
2738 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2761 if (thread->th.th_team != thread->th.th_serial_team) {
2764 if (thread->th.th_team->t.t_serialized > 1) {
2767 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2770 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2771 thread->th.th_team->t.t_serialized) {
2780 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2782 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2784 control->next = thread->th.th_team->t.t_control_stack_top;
2785 thread->th.th_team->t.t_control_stack_top = control;
2805 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2815 root = thread->th.th_root;
2816 if (__kmp_init_parallel && (!root->r.r_active) &&
2817 (root->r.r_hot_team->t.t_nproc > new_nth)
2822 kmp_team_t *hot_team = root->r.r_hot_team;
2828 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2831 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2832 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2836 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2838 __kmp_free_thread(hot_team->t.t_threads[f]);
2839 hot_team->t.t_threads[f] = NULL;
2841 hot_team->t.t_nproc = new_nth;
2843 if (thread->th.th_hot_teams) {
2844 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2845 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2850 hot_team->t.b->update_num_threads(new_nth);
2858 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2859 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2862 hot_team->t.t_size_changed = -1;
2919 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2922 gtid, thread->th.th_current_task,
2923 thread->th.th_current_task->td_icvs.max_active_levels));
2924 return thread->th.th_current_task->td_icvs.max_active_levels;
2927 // nteams-var per-device ICV
2933 // teams-thread-limit-var per-device ICV
2943 /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2954 // Valid parameters should fit in one of two intervals - standard or extended:
2956 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2978 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
2980 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2981 __kmp_sch_map[kind - kmp_sched_lower - 1];
2984 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2985 // kmp_sched_lower - 2 ];
2986 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2987 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2988 kmp_sched_lower - 2];
2991 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2994 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2996 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
3010 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3046 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3062 return -1;
3064 team = thr->th.th_team;
3065 ii = team->t.t_level;
3067 return -1;
3069 if (thr->th.th_teams_microtask) {
3071 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3088 dd = team->t.t_serialized;
3091 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3093 if ((team->t.t_serialized) && (!dd)) {
3094 team = team->t.t_parent;
3098 team = team->t.t_parent;
3099 dd = team->t.t_serialized;
3100 ii--;
3104 return (dd > 1) ? (0) : (team->t.t_master_tid);
3120 return -1;
3122 team = thr->th.th_team;
3123 ii = team->t.t_level;
3125 return -1;
3127 if (thr->th.th_teams_microtask) {
3129 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3144 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3146 if (team->t.t_serialized && (!dd)) {
3147 team = team->t.t_parent;
3151 team = team->t.t_parent;
3152 ii--;
3156 return team->t.t_nproc;
3168 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3198 if (!realloc || argc > team->t.t_max_argc) {
3202 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3204 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3205 __kmp_free((void *)team->t.t_argv);
3209 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3212 team->t.t_id, team->t.t_max_argc));
3213 team->t.t_argv = &team->t.t_inline_argv[0];
3216 -1, &team->t.t_inline_argv[0],
3217 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3219 team->t.t_id);
3223 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3228 team->t.t_id, team->t.t_max_argc));
3229 team->t.t_argv =
3230 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
3232 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3233 &team->t.t_argv[team->t.t_max_argc],
3234 sizeof(void *) * team->t.t_max_argc,
3235 "team_%d.t_argv", team->t.t_id);
3244 team->t.t_threads =
3246 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3248 team->t.t_dispatch =
3250 team->t.t_implicit_task_taskdata =
3252 team->t.t_max_nproc = max_nth;
3256 team->t.t_disp_buffer[i].buffer_index = i;
3257 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3264 for (i = 0; i < team->t.t_max_nproc; ++i) {
3265 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3266 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3267 team->t.t_dispatch[i].th_disp_buffer = NULL;
3273 __kmp_free(team->t.t_threads);
3274 __kmp_free(team->t.t_disp_buffer);
3275 __kmp_free(team->t.t_dispatch);
3276 __kmp_free(team->t.t_implicit_task_taskdata);
3277 team->t.t_threads = NULL;
3278 team->t.t_disp_buffer = NULL;
3279 team->t.t_dispatch = NULL;
3280 team->t.t_implicit_task_taskdata = 0;
3284 kmp_info_t **oldThreads = team->t.t_threads;
3286 __kmp_free(team->t.t_disp_buffer);
3287 __kmp_free(team->t.t_dispatch);
3288 __kmp_free(team->t.t_implicit_task_taskdata);
3291 KMP_MEMCPY(team->t.t_threads, oldThreads,
3292 team->t.t_nproc * sizeof(kmp_info_t *));
3337 0; // probably =team->t.t_serial like in save_inter_controls
3338 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3353 KMP_ASSERT(!root->r.r_begin);
3356 __kmp_init_lock(&root->r.r_begin_lock);
3357 root->r.r_begin = FALSE;
3358 root->r.r_active = FALSE;
3359 root->r.r_in_parallel = 0;
3360 root->r.r_blocktime = __kmp_dflt_blocktime;
3362 root->r.r_affinity_assigned = FALSE;
3381 // Non-NULL value should be assigned to make the debugger display the root
3383 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3388 root->r.r_root_team = root_team;
3389 root_team->t.t_control_stack_top = NULL;
3392 root_team->t.t_threads[0] = NULL;
3393 root_team->t.t_nproc = 1;
3394 root_team->t.t_serialized = 1;
3395 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3396 root_team->t.t_sched.sched = r_sched.sched;
3397 root_team->t.t_nested_nth = &__kmp_nested_nth;
3401 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3420 root->r.r_hot_team = hot_team;
3421 root_team->t.t_control_stack_top = NULL;
3423 /* first-time initialization */
3424 hot_team->t.t_parent = root_team;
3427 hot_team_max_nth = hot_team->t.t_max_nproc;
3429 hot_team->t.t_threads[f] = NULL;
3431 hot_team->t.t_nproc = 1;
3432 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3433 hot_team->t.t_sched.sched = r_sched.sched;
3434 hot_team->t.t_size_changed = 0;
3435 hot_team->t.t_nested_nth = &__kmp_nested_nth;
3463 __kmp_print_structure_team_accum(list, team->t.t_parent);
3464 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3468 while (l->next != NULL && l->entry != team) {
3469 l = l->next;
3471 if (l->next != NULL) {
3477 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3478 l = l->next;
3486 l->entry = team;
3487 l->next = item;
3496 __kmp_printf("%2x %p\n", team->t.t_id, team);
3498 __kmp_printf(" - (nil)\n");
3506 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3508 __kmp_printf(" - (nil)\n");
3519 list->entry = NULL;
3520 list->next = NULL;
3522 __kmp_printf("\n------------------------------\nGlobal Thread "
3523 "Table\n------------------------------\n");
3539 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3540 "----------\n");
3547 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3548 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3550 thread->th.th_serial_team);
3551 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3553 thread->th.th_team_master);
3554 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3555 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
3556 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3558 thread->th.th_next_pool);
3560 __kmp_print_structure_team_accum(list, thread->th.th_team);
3561 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3569 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3570 "--------\n");
3577 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3578 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3580 root->r.r_uber_thread);
3581 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3583 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3585 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3586 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3593 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3594 "--------\n");
3595 while (list->next != NULL) {
3596 kmp_team_p const *team = list->entry;
3598 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3599 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3600 __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid);
3601 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3602 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3603 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3604 for (i = 0; i < team->t.t_nproc; ++i) {
3606 __kmp_print_structure_thread("", team->t.t_threads[i]);
3608 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3610 list = list->next;
3614 __kmp_printf("\n------------------------------\nPools\n----------------------"
3615 "--------\n");
3625 list = list->next;
3632 //---------------------------------------------------------------------------
3633 // Stuff for per-thread fast random number generator
3648 //---------------------------------------------------------------------------
3651 unsigned x = thread->th.th_x;
3654 thread->th.th_x = x * thread->th.th_a + 1;
3657 thread->th.th_info.ds.ds_tid, r));
3661 //--------------------------------------------------------
3664 unsigned seed = thread->th.th_info.ds.ds_tid;
3666 thread->th.th_a =
3668 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3670 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3683 ->r.r_active) { // AC: reclaim only roots died in non-active state
3726 nNeed -= added;
3750 // instead of having it performed here. -BB
3755 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3756 /* possible expansion too small -- give up */
3778 node->threads = __kmp_threads;
3779 node->next = __kmp_old_threads_list;
3784 added += newCapacity - __kmp_threads_capacity;
3812 /* 2007-03-02:
3815 work as expected -- it may return false (that means there is at least one
3828 --capacity;
3835 capacity -= __kmp_hidden_helper_threads_num;
3913 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3918 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3919 __kmp_stats_thread_ptr->startLife();
3926 if (root->r.r_uber_thread) {
3927 root_thread = root->r.r_uber_thread;
3933 root_thread->th.th_info.ds.ds_gtid = gtid;
3935 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3937 root_thread->th.th_root = root;
3939 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3946 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3953 if (!root_thread->th.th_serial_team) {
3956 root_thread->th.th_serial_team = __kmp_allocate_team(
3963 KMP_ASSERT(root_thread->th.th_serial_team);
3965 root_thread->th.th_serial_team));
3970 root->r.r_root_team->t.t_threads[0] = root_thread;
3971 root->r.r_hot_team->t.t_threads[0] = root_thread;
3972 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3974 root_thread->th.th_serial_team->t.t_serialized = 0;
3975 root->r.r_uber_thread = root_thread;
3978 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3996 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3997 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
4002 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
4004 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
4008 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4012 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4013 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4014 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4015 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4017 root_thread->th.th_def_allocator = __kmp_def_allocator;
4018 root_thread->th.th_prev_level = 0;
4019 root_thread->th.th_prev_num_threads = 1;
4022 tmp->cg_root = root_thread;
4023 tmp->cg_thread_limit = __kmp_cg_max_nth;
4024 tmp->cg_nthreads = 1;
4028 tmp->up = NULL;
4029 root_thread->th.th_cg_roots = tmp;
4071 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4078 n = nth - 1; // primary thread is not freed
4079 if (level < max_level - 1) {
4081 kmp_info_t *th = team->t.t_threads[i];
4083 if (i > 0 && th->th.th_hot_teams) {
4084 __kmp_free(th->th.th_hot_teams);
4085 th->th.th_hot_teams = NULL;
4097 kmp_team_t *root_team = root->r.r_root_team;
4098 kmp_team_t *hot_team = root->r.r_hot_team;
4099 int n = hot_team->t.t_nproc;
4102 KMP_DEBUG_ASSERT(!root->r.r_active);
4104 root->r.r_root_team = NULL;
4105 root->r.r_hot_team = NULL;
4112 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4113 kmp_info_t *th = hot_team->t.t_threads[i];
4117 if (th->th.th_hot_teams) {
4118 __kmp_free(th->th.th_hot_teams);
4119 th->th.th_hot_teams = NULL;
4138 (LPVOID) & (root->r.r_uber_thread->th),
4139 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4140 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4159 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4164 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
4165 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4168 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4169 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4172 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4173 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4174 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4175 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4176 root->r.r_uber_thread->th.th_cg_roots = NULL;
4178 __kmp_reap_thread(root->r.r_uber_thread, 1);
4182 root->r.r_uber_thread = NULL;
4184 root->r.r_begin = FALSE;
4206 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4207 KMP_ASSERT(root->r.r_active == FALSE);
4212 kmp_team_t *team = thread->th.th_team;
4213 kmp_task_team_t *task_team = thread->th.th_task_team;
4216 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4217 task_team->tt.tt_hidden_helper_task_encountered)) {
4220 thread->th.ompt_thread_info.state = ompt_state_undefined;
4245 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4246 KMP_ASSERT(root->r.r_active == FALSE);
4261 kmp_team_t *steam = this_thr->th.th_serial_team;
4262 kmp_team_t *team = this_thr->th.th_team;
4267 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4268 team->t.t_implicit_task_taskdata[tid].td_parent);
4277 /* this_thr->th.th_info.ds.ds_gtid is setup in
4279 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4281 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4283 KMP_DEBUG_ASSERT(team->t.t_threads);
4284 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4285 kmp_info_t *master = team->t.t_threads[0];
4287 KMP_DEBUG_ASSERT(master->th.th_root);
4291 TCW_SYNC_PTR(this_thr->th.th_team, team);
4293 this_thr->th.th_info.ds.ds_tid = tid;
4294 this_thr->th.th_set_nproc = 0;
4298 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4299 else // no tasking --> always safe to reap
4300 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4301 this_thr->th.th_set_proc_bind = proc_bind_default;
4304 this_thr->th.th_new_place = this_thr->th.th_current_place;
4306 this_thr->th.th_root = master->th.th_root;
4309 this_thr->th.th_team_nproc = team->t.t_nproc;
4310 this_thr->th.th_team_master = master;
4311 this_thr->th.th_team_serialized = team->t.t_serialized;
4313 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4316 tid, gtid, this_thr, this_thr->th.th_current_task));
4318 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4322 tid, gtid, this_thr, this_thr->th.th_current_task));
4323 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4327 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4329 this_thr->th.th_local.this_construct = 0;
4331 if (!this_thr->th.th_pri_common) {
4332 this_thr->th.th_pri_common =
4336 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4339 this_thr->th.th_pri_head = NULL;
4343 this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4345 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4346 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4349 int i = tmp->cg_nthreads--;
4352 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4354 __kmp_free(tmp); // last thread left CG --> free it
4357 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4359 this_thr->th.th_cg_roots->cg_nthreads++;
4362 this_thr, this_thr->th.th_cg_roots,
4363 this_thr->th.th_cg_roots->cg_root,
4364 this_thr->th.th_cg_roots->cg_nthreads));
4365 this_thr->th.th_current_task->td_icvs.thread_limit =
4366 this_thr->th.th_cg_roots->cg_thread_limit;
4371 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4375 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4377 team->t.t_max_nproc));
4379 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4380 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4382 dispatch->th_disp_index = 0;
4383 dispatch->th_doacross_buf_idx = 0;
4384 if (!dispatch->th_disp_buffer) {
4385 dispatch->th_disp_buffer =
4390 gtid, &dispatch->th_disp_buffer[0],
4391 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4397 gtid, team->t.t_id, gtid);
4400 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
4403 dispatch->th_dispatch_pr_current = 0;
4404 dispatch->th_dispatch_sh_current = 0;
4406 dispatch->th_deo_fcn = 0; /* ORDERED */
4407 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4410 this_thr->th.th_next_pool = NULL;
4412 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4413 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4441 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4445 TCW_4(new_thr->th.th_in_pool, FALSE);
4448 if (new_thr->th.th_active_in_pool == TRUE) {
4449 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4451 new_thr->th.th_active_in_pool = FALSE;
4456 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4457 KMP_ASSERT(!new_thr->th.th_team);
4462 new_thr->th.th_info.ds.ds_gtid);
4463 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4467 new_thr->th.th_task_state = 0;
4471 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4489 kmp_balign_t *balign = new_thr->th.th_bar;
4495 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4555 new_thr->th.th_nt_strict = false;
4556 new_thr->th.th_nt_loc = NULL;
4557 new_thr->th.th_nt_sev = severity_fatal;
4558 new_thr->th.th_nt_msg = NULL;
4567 &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
4570 &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
4574 &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
4578 &new_thr->th.th_suspend_init_count,
4579 sizeof(new_thr->th.th_suspend_init_count));
4584 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4585 sizeof(new_thr->th.th_bar[0].bb.b_go));
4588 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4589 sizeof(new_thr->th.th_bar[1].bb.b_go));
4592 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4593 sizeof(new_thr->th.th_bar[2].bb.b_go));
4603 new_thr->th.th_serial_team = serial_team =
4612 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4614 serial_team->t.t_threads[0] = new_thr;
4627 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4639 kmp_balign_t *balign = new_thr->th.th_bar;
4647 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4648 new_thr->th.th_sleep_loc_type = flag_unset;
4650 new_thr->th.th_spin_here = FALSE;
4651 new_thr->th.th_next_waiting = 0;
4653 new_thr->th.th_blocking = false;
4657 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4658 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4659 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4660 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4662 new_thr->th.th_def_allocator = __kmp_def_allocator;
4663 new_thr->th.th_prev_level = 0;
4664 new_thr->th.th_prev_num_threads = 1;
4666 TCW_4(new_thr->th.th_in_pool, FALSE);
4667 new_thr->th.th_active_in_pool = FALSE;
4668 TCW_4(new_thr->th.th_active, TRUE);
4670 new_thr->th.th_set_nested_nth = NULL;
4671 new_thr->th.th_set_nested_nth_sz = 0;
4723 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4728 team->t.t_threads[0], team));
4730 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4731 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4733 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4735 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4736 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4739 team->t.t_threads[0], team));
4752 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4753 KMP_DEBUG_ASSERT(team->t.t_threads);
4756 team->t.t_master_tid = 0; /* not needed */
4757 /* team->t.t_master_bar; not needed */
4758 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4759 team->t.t_nproc = new_nproc;
4761 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4762 team->t.t_next_pool = NULL;
4763 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4766 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4767 team->t.t_invoke = NULL; /* not needed */
4769 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4770 team->t.t_sched.sched = new_icvs->sched.sched;
4773 team->t.t_fp_control_saved = FALSE; /* not needed */
4774 team->t.t_x87_fpu_control_word = 0; /* not needed */
4775 team->t.t_mxcsr = 0; /* not needed */
4778 team->t.t_construct = 0;
4780 team->t.t_ordered.dt.t_value = 0;
4781 team->t.t_master_active = FALSE;
4784 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
4787 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
4790 team->t.t_control_stack_top = NULL;
4801 th->th.th_first_place = first;
4802 th->th.th_last_place = last;
4803 th->th.th_new_place = newp;
4804 if (newp != th->th.th_current_place) {
4805 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4806 team->t.t_display_affinity = 1;
4808 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4809 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4822 kmp_info_t *master_th = team->t.t_threads[0];
4824 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4825 int first_place = master_th->th.th_first_place;
4826 int last_place = master_th->th.th_last_place;
4827 int masters_place = master_th->th.th_current_place;
4829 team->t.t_first_place = first_place;
4830 team->t.t_last_place = last_place;
4834 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4835 team->t.t_id, masters_place, first_place, last_place));
4841 // Not an issue -- we don't rebind primary thread for any proc_bind policy.
4842 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4847 int n_th = team->t.t_nproc;
4849 kmp_info_t *th = team->t.t_threads[f];
4855 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4862 int n_th = team->t.t_nproc;
4865 n_places = last_place - first_place + 1;
4867 n_places = num_masks - first_place + last_place + 1;
4872 kmp_info_t *th = team->t.t_threads[f];
4877 } else if (place == (num_masks - 1)) {
4886 __kmp_gtid_from_thread(team->t.t_threads[f]),
4887 team->t.t_id, f, place, first_place, last_place));
4893 rem = n_th - (S * n_places);
4898 kmp_info_t *th = team->t.t_threads[f];
4910 } else if (place == (num_masks - 1)) {
4917 rem--;
4921 } else if (place == (num_masks - 1)) {
4933 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4934 th->th.th_new_place, first_place, last_place));
4942 int n_th = team->t.t_nproc;
4946 n_places = last_place - first_place + 1;
4948 n_places = num_masks - first_place + last_place + 1;
4951 int place = -1;
4958 rem = n_places - n_th * S;
4965 kmp_info_t *th = team->t.t_threads[f];
4973 } else if (place == (num_masks - 1)) {
4983 } else if (place == (num_masks - 1)) {
4988 rem--;
4996 } else if (place == (num_masks - 1)) {
5005 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5006 f, th->th.th_new_place, th->th.th_first_place,
5007 th->th.th_last_place, num_masks));
5024 last = static_cast<int>(current + spacing) - 1;
5028 first -= n_places;
5029 last -= n_places;
5032 first--;
5035 KMP_DEBUG_ASSERT(f == (n_th - 1));
5036 last--;
5045 last = (n_places - 1);
5055 th = team->t.t_threads[f];
5061 __kmp_gtid_from_thread(team->t.t_threads[f]),
5062 team->t.t_id, f, th->th.th_new_place,
5063 th->th.th_first_place, th->th.th_last_place, spacing));
5072 rem = n_th - (S * n_places);
5080 kmp_info_t *th = team->t.t_threads[f];
5092 } else if (place == (num_masks - 1)) {
5099 rem--;
5103 } else if (place == (num_masks - 1)) {
5114 __kmp_gtid_from_thread(team->t.t_threads[f]),
5115 team->t.t_id, f, th->th.th_new_place,
5116 th->th.th_first_place, th->th.th_last_place));
5126 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
5144 int use_hot_team = !root->r.r_active;
5156 team = master->th.th_team;
5157 level = team->t.t_active_level;
5158 if (master->th.th_teams_microtask) { // in teams construct?
5159 if (master->th.th_teams_size.nteams > 1 &&
5161 team->t.t_pkfn ==
5163 master->th.th_teams_level <
5164 team->t.t_level)) { // or nested parallel inside the teams
5170 if ((master->th.th_teams_size.nteams == 1 &&
5171 master->th.th_teams_level >= team->t.t_level) ||
5172 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5175 hot_teams = master->th.th_hot_teams;
5194 team = root->r.r_hot_team;
5200 team->t.t_task_team[0], team->t.t_task_team[1]));
5204 if (team->t.t_nproc != new_nproc &&
5207 int old_nthr = team->t.t_nproc;
5214 team->t.t_proc_bind = proc_bind_default;
5218 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
5222 if (team->t.t_size_changed == -1) {
5223 team->t.t_size_changed = 1;
5225 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5228 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5229 kmp_r_sched_t new_sched = new_icvs->sched;
5230 // set primary thread's schedule as new run-time schedule
5231 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5234 root->r.r_uber_thread->th.th_ident);
5237 team->t.t_threads[0], team));
5238 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5241 if ((team->t.t_size_changed == 0) &&
5242 (team->t.t_proc_bind == new_proc_bind)) {
5251 team->t.t_id, new_proc_bind, team->t.t_first_place,
5252 team->t.t_last_place));
5255 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5260 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5262 } else if (team->t.t_nproc > new_nproc) {
5267 team->t.t_size_changed = 1;
5276 for (f = new_nproc; f < team->t.t_nproc; f++) {
5277 kmp_info_t *th = team->t.t_threads[f];
5279 th->th.th_task_team = NULL;
5286 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5290 for (f = new_nproc; f < team->t.t_nproc; f++) {
5291 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5292 __kmp_free_thread(team->t.t_threads[f]);
5293 team->t.t_threads[f] = NULL;
5300 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5301 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5302 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5312 team->t.t_nproc = new_nproc;
5313 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5314 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5316 root->r.r_uber_thread->th.th_ident);
5320 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5326 team->t.t_threads[0], team));
5328 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5331 for (f = 0; f < team->t.t_nproc; f++) {
5332 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5333 team->t.t_threads[f]->th.th_team_nproc ==
5334 team->t.t_nproc);
5339 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5344 } else { // team->t.t_nproc < new_nproc
5349 int old_nproc = team->t.t_nproc; // save old value and use to update only
5350 team->t.t_size_changed = 1;
5356 kmp_info_t **other_threads = team->t.t_threads;
5357 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5361 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5363 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5366 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5374 team->t.t_nproc = new_nproc; // just get reserved threads involved
5378 team->t.t_nproc = hot_teams[level].hot_team_nth;
5381 if (team->t.t_max_nproc < new_nproc) {
5398 for (f = team->t.t_nproc; f < new_nproc; f++) {
5401 team->t.t_threads[f] = new_worker;
5406 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5407 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5408 team->t.t_bar[bs_plain_barrier].b_arrived));
5412 kmp_balign_t *balign = new_worker->th.th_bar;
5414 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5418 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5440 root->r.r_uber_thread->th.th_ident);
5443 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5444 for (f = 0; f < team->t.t_nproc; ++f)
5445 __kmp_initialize_info(team->t.t_threads[f], team, f,
5449 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5450 for (f = old_nproc; f < team->t.t_nproc; ++f)
5451 team->t.t_threads[f]->th.th_task_state = old_state;
5454 for (f = 0; f < team->t.t_nproc; ++f) {
5455 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5456 team->t.t_threads[f]->th.th_team_nproc ==
5457 team->t.t_nproc);
5462 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5469 if (master->th.th_teams_microtask) {
5472 kmp_info_t *thr = team->t.t_threads[f];
5473 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5474 thr->th.th_teams_level = master->th.th_teams_level;
5475 thr->th.th_teams_size = master->th.th_teams_size;
5483 kmp_info_t *thr = team->t.t_threads[f];
5485 kmp_balign_t *balign = thr->th.th_bar;
5487 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5490 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5499 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5500 // The hot team re-uses the previous task team,
5501 // if untouched during the previous release->gather phase.
5509 team->t.t_task_team[0], team->t.t_task_team[1]));
5527 if (team->t.t_max_nproc >= max_nproc) {
5529 __kmp_team_pool = team->t.t_next_pool;
5533 if (!team->t.b) { // Allocate barrier structure
5534 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5543 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5544 team->t.t_task_team[0] = NULL;
5545 team->t.t_task_team[1] = NULL;
5549 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5553 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5557 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5559 team->t.t_bar[b].b_master_arrived = 0;
5560 team->t.t_bar[b].b_team_arrived = 0;
5565 team->t.t_proc_bind = new_proc_bind;
5568 team->t.t_id));
5574 team->t.t_nested_nth = NULL;
5582 // not sure if this is wise, but, will be redone during the hot-teams
5584 /* TODO: Use technique to find the right size hot-team, don't reap them */
5594 team->t.t_max_nproc = max_nproc;
5598 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5610 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5611 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5613 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5617 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5622 team->t.t_argc = argc;
5626 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5630 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5632 team->t.t_bar[b].b_master_arrived = 0;
5633 team->t.t_bar[b].b_team_arrived = 0;
5638 team->t.t_proc_bind = new_proc_bind;
5642 team->t.ompt_serialized_team_info = NULL;
5647 team->t.t_nested_nth = NULL;
5650 team->t.t_id));
5655 /* TODO implement hot-teams at all levels */
5664 team->t.t_id));
5669 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5670 KMP_DEBUG_ASSERT(team->t.t_threads);
5672 int use_hot_team = team == root->r.r_hot_team;
5676 level = team->t.t_active_level - 1;
5677 if (master->th.th_teams_microtask) { // in teams construct?
5678 if (master->th.th_teams_size.nteams > 1) {
5682 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5683 master->th.th_teams_level == team->t.t_level) {
5686 } // team->t.t_level will be increased inside parallel
5689 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5699 TCW_SYNC_PTR(team->t.t_pkfn,
5702 team->t.t_copyin_counter = 0; // init counter for possible reuse
5706 /* if we are non-hot team, release our threads */
5710 for (f = 1; f < team->t.t_nproc; ++f) {
5711 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5712 kmp_info_t *th = team->t.t_threads[f];
5713 volatile kmp_uint32 *state = &th->th.th_reap_state;
5724 if (th->th.th_sleep_loc)
5733 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5735 for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams
5736 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5737 team->t.t_threads[f]->th.th_task_team = NULL;
5742 __kmp_get_gtid(), task_team, team->t.t_id));
5746 team->t.t_task_team[tt_idx] = NULL;
5752 if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
5753 team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
5754 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
5755 KMP_INTERNAL_FREE(team->t.t_nested_nth);
5757 team->t.t_nested_nth = NULL;
5759 // Reset pointer to parent team only for non-hot teams.
5760 team->t.t_parent = NULL;
5761 team->t.t_level = 0;
5762 team->t.t_active_level = 0;
5765 for (f = 1; f < team->t.t_nproc; ++f) {
5766 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5768 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5771 __kmp_free_thread(team->t.t_threads[f]);
5775 if (team->t.b) {
5777 team->t.b->go_release();
5779 for (f = 1; f < team->t.t_nproc; ++f) {
5780 if (team->t.b->sleep[f].sleep) {
5782 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5788 for (int f = 1; f < team->t.t_nproc; ++f) {
5789 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5795 for (f = 1; f < team->t.t_nproc; ++f) {
5796 team->t.t_threads[f] = NULL;
5799 if (team->t.t_max_nproc > 1 &&
5801 distributedBarrier::deallocate(team->t.b);
5802 team->t.b = NULL;
5806 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5810 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5811 team->t.t_threads[1]->th.th_cg_roots);
5812 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5813 // Clean up the CG root nodes on workers so that this team can be re-used
5814 for (f = 1; f < team->t.t_nproc; ++f) {
5815 kmp_info_t *thr = team->t.t_threads[f];
5816 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5817 thr->th.th_cg_roots->cg_root == thr);
5819 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5820 thr->th.th_cg_roots = tmp->up;
5823 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5824 int i = tmp->cg_nthreads--;
5829 if (thr->th.th_cg_roots)
5830 thr->th.th_current_task->td_icvs.thread_limit =
5831 thr->th.th_cg_roots->cg_thread_limit;
5841 kmp_team_t *next_pool = team->t.t_next_pool;
5844 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5845 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5846 KMP_DEBUG_ASSERT(team->t.t_threads);
5847 KMP_DEBUG_ASSERT(team->t.t_argv);
5853 if (team->t.t_argv != &team->t.t_inline_argv[0])
5854 __kmp_free((void *)team->t.t_argv);
5864 // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5872 // With single-level parallelism, threads will always be added to the tail
5885 // Now, for single-level parallelism, the OMP tid is always == gtid.
5891 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5898 kmp_balign_t *balign = this_th->th.th_bar;
5905 this_th->th.th_task_state = 0;
5906 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5909 TCW_PTR(this_th->th.th_team, NULL);
5910 TCW_PTR(this_th->th.th_root, NULL);
5911 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5913 while (this_th->th.th_cg_roots) {
5914 this_th->th.th_cg_roots->cg_nthreads--;
5917 this_th, this_th->th.th_cg_roots,
5918 this_th->th.th_cg_roots->cg_root,
5919 this_th->th.th_cg_roots->cg_nthreads));
5920 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5921 if (tmp->cg_root == this_th) { // Thread is a cg_root
5922 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5925 this_th->th.th_cg_roots = tmp->up;
5928 if (tmp->cg_nthreads == 0) { // last thread leaves contention group
5931 this_th->th.th_cg_roots = NULL;
5937 * -> multiple threads can share the data and try to free the task at
5942 this_th->th.th_current_task = NULL;
5945 // point, then we need to re-scan the entire list.
5946 gtid = this_th->th.th_info.ds.ds_gtid;
5949 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5960 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5964 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5965 scan = &((*scan)->th.th_next_pool))
5970 TCW_PTR(this_th->th.th_next_pool, *scan);
5972 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5973 (this_th->th.th_info.ds.ds_gtid <
5974 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5975 TCW_4(this_th->th.th_in_pool, TRUE);
5978 if (this_th->th.th_active == TRUE) {
5980 this_th->th.th_active_in_pool = TRUE;
5984 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5989 TCW_4(__kmp_nth, __kmp_nth - 1);
6005 /* ------------------------------------------------------------------------ */
6015 int gtid = this_thr->th.th_info.ds.ds_gtid;
6023 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
6034 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6037 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6038 this_thr->th.ompt_thread_info.wait_id = 0;
6039 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6040 this_thr->th.ompt_thread_info.parallel_flags = 0;
6045 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6062 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6066 pteam = &this_thr->th.th_team;
6071 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6075 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6076 (*pteam)->t.t_pkfn));
6082 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6086 rc = (*pteam)->t.t_invoke(gtid);
6091 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6092 (*pteam)->t.t_pkfn));
6097 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6099 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6118 this_thr->th.th_task_team = NULL;
6131 /* ------------------------------------------------------------------------ */
6136 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6139 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
6140 * this is because 0 is reserved for the nothing-stored case */
6180 __kmp_internal_end_library(-1);
6193 gtid = thread->th.th_info.ds.ds_gtid;
6203 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6209 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6221 // There is a small timing hole here - if the worker thread was just waking
6228 if (thread->th.th_active_in_pool) {
6229 thread->th.th_active_in_pool = FALSE;
6247 --__kmp_all_nth;
6263 if (thread->th.th_cons) {
6264 __kmp_free_cons_stack(thread->th.th_cons);
6265 thread->th.th_cons = NULL;
6269 if (thread->th.th_pri_common != NULL) {
6270 __kmp_free(thread->th.th_pri_common);
6271 thread->th.th_pri_common = NULL;
6275 if (thread->th.th_local.bget_data != NULL) {
6281 if (thread->th.th_affin_mask != NULL) {
6282 KMP_CPU_FREE(thread->th.th_affin_mask);
6283 thread->th.th_affin_mask = NULL;
6288 if (thread->th.th_hier_bar_data != NULL) {
6289 __kmp_free(thread->th.th_hier_bar_data);
6290 thread->th.th_hier_bar_data = NULL;
6294 __kmp_reap_team(thread->th.th_serial_team);
6295 thread->th.th_serial_team = NULL;
6308 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6318 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6344 if (__kmp_root[i]->r.r_active)
6351 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6377 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
6389 __kmp_thread_pool = thread->th.th_next_pool;
6391 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6392 thread->th.th_next_pool = NULL;
6393 thread->th.th_in_pool = FALSE;
6402 __kmp_team_pool = team->t.t_next_pool;
6404 team->t.t_next_pool = NULL;
6413 // until all threads either exit the final spin-waiting loop or begin
6417 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6462 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6504 if (__kmp_root[gtid]->r.r_active) {
6505 __kmp_global.g.g_abort = -1;
6585 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6629 if (__kmp_root[gtid]->r.r_active) {
6630 __kmp_global.g.g_abort = -1;
6646 __kmp_threads[gtid]->th.th_task_team = NULL;
6685 /* should we finish the run-time? are all siblings done? */
6714 // -----------------------------------------------------------------------------
6757 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6772 int fd1 = -1;
6776 if ((fd1 == -1) && (errno == EEXIST)) {
6780 if (fd1 == -1) { // file didn't open
6788 if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;
6809 if (fd1 != -1)
6820 int fd1 = -1;
6824 if ((fd1 == -1) && (errno == EEXIST)) {
6828 if (fd1 == -1) { // file didn't open if (fd1 == -1) {
6837 if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;
6858 if (fd1 != -1)
6862 // no /dev/shm and no /tmp -- fall back to environment variable
6880 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6885 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6886 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6894 // First, check whether environment-encoded address is mapped into
6907 case 0: // Cannot parse environment variable -- neighbor status unknown.
6968 if (fd1 != -1) { // File opened successfully
6978 if (fd1 != -1) { // File opened successfully
7027 // -----------------------------------------------------------------------------
7036 if ((cs_p->eax & 0xff0) == 0xB10) {
7038 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7051 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7059 // Spurious, non-existent value that should always fail to return anything.
7071 // use them to find if the user-level mwait is enabled. Otherwise, forcibly
7211 // AC: do not use analytical here, because it is non-monotonous
7301 // case, set them to NULL -- some memory may leak, but subsequent code will
7313 /* Since allocation is cache-aligned, just add extra padding at the end */
7483 // already explicitly set its nthreads-var with a call to
7489 if (thread->th.th_current_task->td_icvs.nproc != 0)
7645 /* ------------------------------------------------------------------------ */
7654 this_thr->th.th_local.this_construct = 0;
7656 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7658 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7660 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7661 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7662 // this_thr->th.th_info.ds.ds_tid ] );
7664 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
7665 dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter
7667 __kmp_push_parallel(gtid, team->t.t_ident);
7675 __kmp_pop_parallel(gtid, team->t.t_ident);
7684 kmp_team_t *team = this_thr->th.th_team;
7690 if (team->t.t_stack_id != NULL) {
7691 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7693 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7695 (__itt_caller)team->t.t_parent->t.t_stack_id);
7711 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7718 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7719 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7721 ompt_team_size = team->t.t_nproc;
7725 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7739 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7740 tid, (int)team->t.t_argc, (void **)team->t.t_argv
7748 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
7761 if (team->t.t_stack_id != NULL) {
7762 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7764 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7766 (__itt_caller)team->t.t_parent->t.t_stack_id);
7778 kmp_team_t *team = thr->th.th_team;
7779 ident_t *loc = team->t.t_ident;
7780 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7781 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7782 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7784 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7788 tmp->cg_root = thr; // Make thr the CG root
7790 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7791 tmp->cg_nthreads = 1; // Init counter to one active thread, this one
7795 tmp->up = thr->th.th_cg_roots;
7796 thr->th.th_cg_roots = tmp;
7803 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7804 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7810 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7811 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7825 kmp_team_t *team = this_thr->th.th_team;
7827 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7828 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7835 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7836 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7839 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7841 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7846 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
7860 thr->th.th_set_nproc = num_threads;
7870 thr->th.th_set_nproc = num_threads_list[0];
7871 thr->th.th_set_nested_nth =
7874 thr->th.th_set_nested_nth[i] = num_threads_list[i];
7875 thr->th.th_set_nested_nth_sz = list_length;
7881 thr->th.th_nt_strict = true;
7882 thr->th.th_nt_loc = loc;
7885 thr->th.th_nt_sev = sev;
7887 thr->th.th_nt_sev = severity_fatal;
7890 thr->th.th_nt_msg = msg;
7892 thr->th.th_nt_msg = "Cannot form team with number of threads specified by "
7913 // num_threads = min(num_threads, nthreads-var, thread-limit-var)
7914 // no thread_limit clause specified - do not change thread-limit-var ICV
7916 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7918 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7919 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7920 } // prevent team size to exceed thread-limit-var
7935 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7936 // num_threads = min(num_threads, nthreads-var)
7938 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7956 thr->th.th_teams_size.nth = num_threads;
7989 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8046 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8054 thr->th.th_set_proc_bind = proc_bind;
8067 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8071 team->t.t_construct = 0; /* no single directives seen yet */
8072 team->t.t_ordered.dt.t_value =
8076 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8077 if (team->t.t_max_nproc > 1) {
8080 team->t.t_disp_buffer[i].buffer_index = i;
8081 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8084 team->t.t_disp_buffer[0].buffer_index = 0;
8085 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8089 KMP_ASSERT(this_thr->th.th_team == team);
8092 for (f = 0; f < team->t.t_nproc; f++) {
8093 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8094 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8106 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8114 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8117 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8118 "team->t.t_nproc=%d\n",
8119 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8120 team->t.t_nproc);
8124 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8129 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
8133 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8135 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8141 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8144 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
8164 KMP_ASSERT(this_thr->th.th_team == team);
8167 /* ------------------------------------------------------------------------ */
8178 if (root->r.r_active) {
8181 hot_team = root->r.r_hot_team;
8183 return hot_team->t.t_nproc - 1; // Don't count primary thread
8186 // Skip the primary thread - it is accounted for elsewhere.
8188 for (i = 1; i < hot_team->t.t_nproc; i++) {
8189 if (hot_team->t.t_threads[i]->th.th_active) {
8208 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8209 ->th.th_current_task->td_icvs.dynamic == TRUE);
8240 retval = __kmp_avail_proc - __kmp_nth +
8241 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8260 retval = __kmp_avail_proc - system_active + team_curr_active;
8274 /* ------------------------------------------------------------------------ */
8322 kmp_old_threads_list_t *next = ptr->next;
8323 __kmp_free(ptr->threads);
8378 /* ------------------------------------------------------------------------ */
8387 // By default __kmpc_begin() is no-op.
8398 // By default __kmpc_end() is no-op.
8409 root = __kmp_threads[gtid]->th.th_root;
8412 if (root->r.r_begin)
8414 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8415 if (root->r.r_begin) {
8416 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8420 root->r.r_begin = TRUE;
8422 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8425 /* ------------------------------------------------------------------------ */
8437 root = thread->th.th_root;
8441 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
8449 thread->th.th_set_nproc = 0;
8453 thread->th.th_set_nproc = 0;
8458 thread->th.th_set_nproc = 0;
8474 if (arg & (0x1000 - 1)) {
8475 arg &= ~(0x1000 - 1);
8526 if (thr->th.th_teams_microtask) {
8527 kmp_team_t *team = thr->th.th_team;
8528 int tlevel = thr->th.th_teams_level; // the level of the teams construct
8529 int ii = team->t.t_level;
8530 teams_serialized = team->t.t_serialized;
8534 for (teams_serialized = team->t.t_serialized;
8535 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8537 if (team->t.t_serialized && (!teams_serialized)) {
8538 team = team->t.t_parent;
8542 team = team->t.t_parent;
8543 ii--;
8558 return team->t.t_master_tid;
8571 return team->t.t_parent->t.t_nproc;
8577 /* ------------------------------------------------------------------------ */
8593 * L {thread_level} - omp_get_level()
8594 * n {thread_num} - omp_get_thread_num()
8595 * h {host} - name of host machine
8596 * P {process_id} - process id (integer)
8597 * T {thread_identifier} - native thread identifier (integer)
8598 * N {num_threads} - omp_get_num_threads()
8599 * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
8600 * a {thread_affinity} - comma separated list of integers or integer ranges
8603 * Implementation-specific field types can be added
8611 char short_name; // from spec e.g., L -> thread level
8612 const char *long_name; // from spec thread_level -> thread level
8682 format[format_index++] = '-';
8743 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8761 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8765 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8772 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8817 // If format is NULL or zero-length string, then we use
8818 // affinity-format-var ICV
8852 /* ------------------------------------------------------------------------ */
8868 set__blocktime_team(thread->th.th_team, tid, blocktime);
8869 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8875 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8876 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8882 set__bt_set_team(thread->th.th_team, tid, bt_set);
8883 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8887 __kmp_gtid_from_tid(tid, thread->th.th_team),
8888 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8892 __kmp_gtid_from_tid(tid, thread->th.th_team),
8893 thread->th.th_team->t.t_id, tid, blocktime));
8908 /* ------------------------------------------------------------------------ */
8919 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8921 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8934 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8992 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
9004 (reduce_size < (2000 * sizeof(kmp_real64)))) {
9070 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9074 // Spin-wait code checks __kmp_pause_status and reacts accordingly.
9081 __kmp_internal_end_thread(-1);
9092 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9162 kmp_info_t **other_threads = team->t.t_threads;
9169 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9175 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9176 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9180 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9182 team->t.t_threads[f]->th.th_used_in_team.store(2);
9183 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9186 team->t.b->go_release();
9192 int count = old_nthreads - 1;
9194 count = old_nthreads - 1;
9196 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9199 void *, other_threads[f]->th.th_sleep_loc);
9200 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9203 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9204 count--;
9209 team->t.b->update_num_threads(new_nthreads);
9210 team->t.b->go_reset();
9222 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9223 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9226 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9233 int count = new_nthreads - 1;
9235 count = new_nthreads - 1;
9237 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9238 count--;
9288 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9311 but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
9342 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9345 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9347 loc--;
9351 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9352 int num_cores = __kmp_topology->get_count(core_level);
9354 for (int level = 0; level < loc - 1; ++level)
9356 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9357 __kmp_nesting_nth_level[loc - 1] =
9358 num_cores / __kmp_nesting_nth_level[loc - 2];