10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_tasking.cpp -- OpenMP 3.0 tasking support. 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "kmp.h" 140b57cec5SDimitry Andric #include "kmp_i18n.h" 150b57cec5SDimitry Andric #include "kmp_itt.h" 160b57cec5SDimitry Andric #include "kmp_stats.h" 170b57cec5SDimitry Andric #include "kmp_wait_release.h" 180b57cec5SDimitry Andric #include "kmp_taskdeps.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric #if OMPT_SUPPORT 210b57cec5SDimitry Andric #include "ompt-specific.h" 220b57cec5SDimitry Andric #endif 230b57cec5SDimitry Andric 24bdd1243dSDimitry Andric #if ENABLE_LIBOMPTARGET 2506c3fb27SDimitry Andric static void (*tgt_target_nowait_query)(void **); 2606c3fb27SDimitry Andric 2706c3fb27SDimitry Andric void __kmp_init_target_task() { 2806c3fb27SDimitry Andric *(void **)(&tgt_target_nowait_query) = KMP_DLSYM("__tgt_target_nowait_query"); 2906c3fb27SDimitry Andric } 30bdd1243dSDimitry Andric #endif 31bdd1243dSDimitry Andric 320b57cec5SDimitry Andric /* forward declaration */ 330b57cec5SDimitry Andric static void __kmp_enable_tasking(kmp_task_team_t *task_team, 340b57cec5SDimitry Andric kmp_info_t *this_thr); 350b57cec5SDimitry Andric static void __kmp_alloc_task_deque(kmp_info_t *thread, 360b57cec5SDimitry Andric kmp_thread_data_t *thread_data); 370b57cec5SDimitry Andric static int __kmp_realloc_task_threads_data(kmp_info_t *thread, 380b57cec5SDimitry Andric kmp_task_team_t *task_team); 390b57cec5SDimitry Andric static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask); 4006c3fb27SDimitry Andric #if OMPX_TASKGRAPH 4106c3fb27SDimitry Andric static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id); 4206c3fb27SDimitry Andric int __kmp_taskloop_task(int gtid, void *ptask); 4306c3fb27SDimitry Andric #endif 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric // __kmp_trace_task_stack: print the tied tasks from the task stack in order 480b57cec5SDimitry Andric // from top do bottom 490b57cec5SDimitry Andric // 500b57cec5SDimitry Andric // gtid: global thread identifier for thread containing stack 510b57cec5SDimitry Andric // thread_data: thread data for task team thread containing stack 520b57cec5SDimitry Andric // threshold: value above which the trace statement triggers 530b57cec5SDimitry Andric // location: string identifying call site of this function (for trace) 540b57cec5SDimitry Andric static void __kmp_trace_task_stack(kmp_int32 gtid, 550b57cec5SDimitry Andric kmp_thread_data_t *thread_data, 560b57cec5SDimitry Andric int threshold, char *location) { 570b57cec5SDimitry Andric kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; 580b57cec5SDimitry Andric kmp_taskdata_t **stack_top = task_stack->ts_top; 590b57cec5SDimitry Andric kmp_int32 entries = task_stack->ts_entries; 600b57cec5SDimitry Andric kmp_taskdata_t *tied_task; 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric KA_TRACE( 630b57cec5SDimitry Andric threshold, 640b57cec5SDimitry Andric ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 650b57cec5SDimitry Andric "first_block = %p, stack_top = %p \n", 660b57cec5SDimitry Andric location, gtid, entries, task_stack->ts_first_block, stack_top)); 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric KMP_DEBUG_ASSERT(stack_top != NULL); 690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(entries > 0); 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric while (entries != 0) { 720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]); 730b57cec5SDimitry Andric // fix up ts_top if we need to pop from previous block 740b57cec5SDimitry Andric if (entries & TASK_STACK_INDEX_MASK == 0) { 750b57cec5SDimitry Andric kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top); 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric stack_block = stack_block->sb_prev; 780b57cec5SDimitry Andric stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric // finish bookkeeping 820b57cec5SDimitry Andric stack_top--; 830b57cec5SDimitry Andric entries--; 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric tied_task = *stack_top; 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tied_task != NULL); 880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric KA_TRACE(threshold, 910b57cec5SDimitry Andric ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 920b57cec5SDimitry Andric "stack_top=%p, tied_task=%p\n", 930b57cec5SDimitry Andric location, gtid, entries, stack_top, tied_task)); 940b57cec5SDimitry Andric } 950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]); 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric KA_TRACE(threshold, 980b57cec5SDimitry Andric ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", 990b57cec5SDimitry Andric location, gtid)); 1000b57cec5SDimitry Andric } 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric // __kmp_init_task_stack: initialize the task stack for the first time 1030b57cec5SDimitry Andric // after a thread_data structure is created. 1040b57cec5SDimitry Andric // It should not be necessary to do this again (assuming the stack works). 1050b57cec5SDimitry Andric // 1060b57cec5SDimitry Andric // gtid: global thread identifier of calling thread 1070b57cec5SDimitry Andric // thread_data: thread data for task team thread containing stack 1080b57cec5SDimitry Andric static void __kmp_init_task_stack(kmp_int32 gtid, 1090b57cec5SDimitry Andric kmp_thread_data_t *thread_data) { 1100b57cec5SDimitry Andric kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; 1110b57cec5SDimitry Andric kmp_stack_block_t *first_block; 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric // set up the first block of the stack 1140b57cec5SDimitry Andric first_block = &task_stack->ts_first_block; 1150b57cec5SDimitry Andric task_stack->ts_top = (kmp_taskdata_t **)first_block; 1160b57cec5SDimitry Andric memset((void *)first_block, '\0', 1170b57cec5SDimitry Andric TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric // initialize the stack to be empty 1200b57cec5SDimitry Andric task_stack->ts_entries = TASK_STACK_EMPTY; 1210b57cec5SDimitry Andric first_block->sb_next = NULL; 1220b57cec5SDimitry Andric first_block->sb_prev = NULL; 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric // __kmp_free_task_stack: free the task stack when thread_data is destroyed. 1260b57cec5SDimitry Andric // 1270b57cec5SDimitry Andric // gtid: global thread identifier for calling thread 1280b57cec5SDimitry Andric // thread_data: thread info for thread containing stack 1290b57cec5SDimitry Andric static void __kmp_free_task_stack(kmp_int32 gtid, 1300b57cec5SDimitry Andric kmp_thread_data_t *thread_data) { 1310b57cec5SDimitry Andric kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; 1320b57cec5SDimitry Andric kmp_stack_block_t *stack_block = &task_stack->ts_first_block; 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY); 1350b57cec5SDimitry Andric // free from the second block of the stack 1360b57cec5SDimitry Andric while (stack_block != NULL) { 1370b57cec5SDimitry Andric kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL; 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric stack_block->sb_next = NULL; 1400b57cec5SDimitry Andric stack_block->sb_prev = NULL; 1410b57cec5SDimitry Andric if (stack_block != &task_stack->ts_first_block) { 1420b57cec5SDimitry Andric __kmp_thread_free(thread, 1430b57cec5SDimitry Andric stack_block); // free the block, if not the first 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric stack_block = next_block; 1460b57cec5SDimitry Andric } 1470b57cec5SDimitry Andric // initialize the stack to be empty 1480b57cec5SDimitry Andric task_stack->ts_entries = 0; 1490b57cec5SDimitry Andric task_stack->ts_top = NULL; 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric // __kmp_push_task_stack: Push the tied task onto the task stack. 1530b57cec5SDimitry Andric // Grow the stack if necessary by allocating another block. 1540b57cec5SDimitry Andric // 1550b57cec5SDimitry Andric // gtid: global thread identifier for calling thread 1560b57cec5SDimitry Andric // thread: thread info for thread containing stack 1570b57cec5SDimitry Andric // tied_task: the task to push on the stack 1580b57cec5SDimitry Andric static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread, 1590b57cec5SDimitry Andric kmp_taskdata_t *tied_task) { 1600b57cec5SDimitry Andric // GEH - need to consider what to do if tt_threads_data not allocated yet 1610b57cec5SDimitry Andric kmp_thread_data_t *thread_data = 1620b57cec5SDimitry Andric &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; 1630b57cec5SDimitry Andric kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) { 1660b57cec5SDimitry Andric return; // Don't push anything on stack if team or team tasks are serialized 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); 1700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric KA_TRACE(20, 1730b57cec5SDimitry Andric ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", 1740b57cec5SDimitry Andric gtid, thread, tied_task)); 1750b57cec5SDimitry Andric // Store entry 1760b57cec5SDimitry Andric *(task_stack->ts_top) = tied_task; 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric // Do bookkeeping for next push 1790b57cec5SDimitry Andric task_stack->ts_top++; 1800b57cec5SDimitry Andric task_stack->ts_entries++; 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { 1830b57cec5SDimitry Andric // Find beginning of this task block 1840b57cec5SDimitry Andric kmp_stack_block_t *stack_block = 1850b57cec5SDimitry Andric (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE); 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric // Check if we already have a block 1880b57cec5SDimitry Andric if (stack_block->sb_next != 1890b57cec5SDimitry Andric NULL) { // reset ts_top to beginning of next block 1900b57cec5SDimitry Andric task_stack->ts_top = &stack_block->sb_next->sb_block[0]; 1910b57cec5SDimitry Andric } else { // Alloc new block and link it up 1920b57cec5SDimitry Andric kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc( 1930b57cec5SDimitry Andric thread, sizeof(kmp_stack_block_t)); 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric task_stack->ts_top = &new_block->sb_block[0]; 1960b57cec5SDimitry Andric stack_block->sb_next = new_block; 1970b57cec5SDimitry Andric new_block->sb_prev = stack_block; 1980b57cec5SDimitry Andric new_block->sb_next = NULL; 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric KA_TRACE( 2010b57cec5SDimitry Andric 30, 2020b57cec5SDimitry Andric ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", 2030b57cec5SDimitry Andric gtid, tied_task, new_block)); 2040b57cec5SDimitry Andric } 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, 2070b57cec5SDimitry Andric tied_task)); 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return 2110b57cec5SDimitry Andric // the task, just check to make sure it matches the ending task passed in. 2120b57cec5SDimitry Andric // 2130b57cec5SDimitry Andric // gtid: global thread identifier for the calling thread 2140b57cec5SDimitry Andric // thread: thread info structure containing stack 2150b57cec5SDimitry Andric // tied_task: the task popped off the stack 2160b57cec5SDimitry Andric // ending_task: the task that is ending (should match popped task) 2170b57cec5SDimitry Andric static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread, 2180b57cec5SDimitry Andric kmp_taskdata_t *ending_task) { 2190b57cec5SDimitry Andric // GEH - need to consider what to do if tt_threads_data not allocated yet 2200b57cec5SDimitry Andric kmp_thread_data_t *thread_data = 2210b57cec5SDimitry Andric &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)]; 2220b57cec5SDimitry Andric kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; 2230b57cec5SDimitry Andric kmp_taskdata_t *tied_task; 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) { 2260b57cec5SDimitry Andric // Don't pop anything from stack if team or team tasks are serialized 2270b57cec5SDimitry Andric return; 2280b57cec5SDimitry Andric } 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); 2310b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task_stack->ts_entries > 0); 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, 2340b57cec5SDimitry Andric thread)); 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric // fix up ts_top if we need to pop from previous block 2370b57cec5SDimitry Andric if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { 2380b57cec5SDimitry Andric kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top); 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric stack_block = stack_block->sb_prev; 2410b57cec5SDimitry Andric task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; 2420b57cec5SDimitry Andric } 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric // finish bookkeeping 2450b57cec5SDimitry Andric task_stack->ts_top--; 2460b57cec5SDimitry Andric task_stack->ts_entries--; 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric tied_task = *(task_stack->ts_top); 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tied_task != NULL); 2510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); 2520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, 2550b57cec5SDimitry Andric tied_task)); 2560b57cec5SDimitry Andric return; 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric #endif /* BUILD_TIED_TASK_STACK */ 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric // returns 1 if new task is allowed to execute, 0 otherwise 2610b57cec5SDimitry Andric // checks Task Scheduling constraint (if requested) and 2620b57cec5SDimitry Andric // mutexinoutset dependencies if any 2630b57cec5SDimitry Andric static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained, 2640b57cec5SDimitry Andric const kmp_taskdata_t *tasknew, 2650b57cec5SDimitry Andric const kmp_taskdata_t *taskcurr) { 2660b57cec5SDimitry Andric if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) { 2670b57cec5SDimitry Andric // Check if the candidate obeys the Task Scheduling Constraints (TSC) 2680b57cec5SDimitry Andric // only descendant of all deferred tied tasks can be scheduled, checking 2690b57cec5SDimitry Andric // the last one is enough, as it in turn is the descendant of all others 2700b57cec5SDimitry Andric kmp_taskdata_t *current = taskcurr->td_last_tied; 2710b57cec5SDimitry Andric KMP_DEBUG_ASSERT(current != NULL); 2720b57cec5SDimitry Andric // check if the task is not suspended on barrier 2730b57cec5SDimitry Andric if (current->td_flags.tasktype == TASK_EXPLICIT || 2740b57cec5SDimitry Andric current->td_taskwait_thread > 0) { // <= 0 on barrier 2750b57cec5SDimitry Andric kmp_int32 level = current->td_level; 2760b57cec5SDimitry Andric kmp_taskdata_t *parent = tasknew->td_parent; 2770b57cec5SDimitry Andric while (parent != current && parent->td_level > level) { 2780b57cec5SDimitry Andric // check generation up to the level of the current task 2790b57cec5SDimitry Andric parent = parent->td_parent; 2800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parent != NULL); 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric if (parent != current) 2830b57cec5SDimitry Andric return false; 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric // Check mutexinoutset dependencies, acquire locks 2870b57cec5SDimitry Andric kmp_depnode_t *node = tasknew->td_depnode; 28806c3fb27SDimitry Andric #if OMPX_TASKGRAPH 28906c3fb27SDimitry Andric if (!tasknew->is_taskgraph && UNLIKELY(node && (node->dn.mtx_num_locks > 0))) { 29006c3fb27SDimitry Andric #else 291e8d8bef9SDimitry Andric if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) { 29206c3fb27SDimitry Andric #endif 2930b57cec5SDimitry Andric for (int i = 0; i < node->dn.mtx_num_locks; ++i) { 2940b57cec5SDimitry Andric KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL); 2950b57cec5SDimitry Andric if (__kmp_test_lock(node->dn.mtx_locks[i], gtid)) 2960b57cec5SDimitry Andric continue; 2970b57cec5SDimitry Andric // could not get the lock, release previous locks 2980b57cec5SDimitry Andric for (int j = i - 1; j >= 0; --j) 2990b57cec5SDimitry Andric __kmp_release_lock(node->dn.mtx_locks[j], gtid); 3000b57cec5SDimitry Andric return false; 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric // negative num_locks means all locks acquired successfully 3030b57cec5SDimitry Andric node->dn.mtx_num_locks = -node->dn.mtx_num_locks; 3040b57cec5SDimitry Andric } 3050b57cec5SDimitry Andric return true; 3060b57cec5SDimitry Andric } 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric // __kmp_realloc_task_deque: 3090b57cec5SDimitry Andric // Re-allocates a task deque for a particular thread, copies the content from 3100b57cec5SDimitry Andric // the old deque and adjusts the necessary data structures relating to the 3110b57cec5SDimitry Andric // deque. This operation must be done with the deque_lock being held 3120b57cec5SDimitry Andric static void __kmp_realloc_task_deque(kmp_info_t *thread, 3130b57cec5SDimitry Andric kmp_thread_data_t *thread_data) { 3140b57cec5SDimitry Andric kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td); 3155ffd83dbSDimitry Andric KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size); 3160b57cec5SDimitry Andric kmp_int32 new_size = 2 * size; 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " 3190b57cec5SDimitry Andric "%d] for thread_data %p\n", 3200b57cec5SDimitry Andric __kmp_gtid_from_thread(thread), size, new_size, thread_data)); 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric kmp_taskdata_t **new_deque = 3230b57cec5SDimitry Andric (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *)); 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric int i, j; 3260b57cec5SDimitry Andric for (i = thread_data->td.td_deque_head, j = 0; j < size; 3270b57cec5SDimitry Andric i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++) 3280b57cec5SDimitry Andric new_deque[j] = thread_data->td.td_deque[i]; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric __kmp_free(thread_data->td.td_deque); 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric thread_data->td.td_deque_head = 0; 3330b57cec5SDimitry Andric thread_data->td.td_deque_tail = size; 3340b57cec5SDimitry Andric thread_data->td.td_deque = new_deque; 3350b57cec5SDimitry Andric thread_data->td.td_deque_size = new_size; 3360b57cec5SDimitry Andric } 3370b57cec5SDimitry Andric 33881ad6265SDimitry Andric static kmp_task_pri_t *__kmp_alloc_task_pri_list() { 33981ad6265SDimitry Andric kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(sizeof(kmp_task_pri_t)); 34081ad6265SDimitry Andric kmp_thread_data_t *thread_data = &l->td; 34181ad6265SDimitry Andric __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock); 34281ad6265SDimitry Andric thread_data->td.td_deque_last_stolen = -1; 34381ad6265SDimitry Andric KE_TRACE(20, ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] " 34481ad6265SDimitry Andric "for thread_data %p\n", 34581ad6265SDimitry Andric __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data)); 34681ad6265SDimitry Andric thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate( 34781ad6265SDimitry Andric INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *)); 34881ad6265SDimitry Andric thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE; 34981ad6265SDimitry Andric return l; 35081ad6265SDimitry Andric } 35181ad6265SDimitry Andric 35281ad6265SDimitry Andric // The function finds the deque of priority tasks with given priority, or 35381ad6265SDimitry Andric // allocates a new deque and put it into sorted (high -> low) list of deques. 35481ad6265SDimitry Andric // Deques of non-default priority tasks are shared between all threads in team, 35581ad6265SDimitry Andric // as opposed to per-thread deques of tasks with default priority. 35681ad6265SDimitry Andric // The function is called under the lock task_team->tt.tt_task_pri_lock. 35781ad6265SDimitry Andric static kmp_thread_data_t * 35881ad6265SDimitry Andric __kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) { 35981ad6265SDimitry Andric kmp_thread_data_t *thread_data; 36081ad6265SDimitry Andric kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list; 36181ad6265SDimitry Andric if (lst->priority == pri) { 36281ad6265SDimitry Andric // Found queue of tasks with given priority. 36381ad6265SDimitry Andric thread_data = &lst->td; 36481ad6265SDimitry Andric } else if (lst->priority < pri) { 36581ad6265SDimitry Andric // All current priority queues contain tasks with lower priority. 36681ad6265SDimitry Andric // Allocate new one for given priority tasks. 36781ad6265SDimitry Andric kmp_task_pri_t *list = __kmp_alloc_task_pri_list(); 36881ad6265SDimitry Andric thread_data = &list->td; 36981ad6265SDimitry Andric list->priority = pri; 37081ad6265SDimitry Andric list->next = lst; 37181ad6265SDimitry Andric task_team->tt.tt_task_pri_list = list; 37281ad6265SDimitry Andric } else { // task_team->tt.tt_task_pri_list->priority > pri 37381ad6265SDimitry Andric kmp_task_pri_t *next_queue = lst->next; 37481ad6265SDimitry Andric while (next_queue && next_queue->priority > pri) { 37581ad6265SDimitry Andric lst = next_queue; 37681ad6265SDimitry Andric next_queue = lst->next; 37781ad6265SDimitry Andric } 37881ad6265SDimitry Andric // lst->priority > pri && (next == NULL || pri >= next->priority) 37981ad6265SDimitry Andric if (next_queue == NULL) { 38081ad6265SDimitry Andric // No queue with pri priority, need to allocate new one. 38181ad6265SDimitry Andric kmp_task_pri_t *list = __kmp_alloc_task_pri_list(); 38281ad6265SDimitry Andric thread_data = &list->td; 38381ad6265SDimitry Andric list->priority = pri; 38481ad6265SDimitry Andric list->next = NULL; 38581ad6265SDimitry Andric lst->next = list; 38681ad6265SDimitry Andric } else if (next_queue->priority == pri) { 38781ad6265SDimitry Andric // Found queue of tasks with given priority. 38881ad6265SDimitry Andric thread_data = &next_queue->td; 38981ad6265SDimitry Andric } else { // lst->priority > pri > next->priority 39081ad6265SDimitry Andric // insert newly allocated between existed queues 39181ad6265SDimitry Andric kmp_task_pri_t *list = __kmp_alloc_task_pri_list(); 39281ad6265SDimitry Andric thread_data = &list->td; 39381ad6265SDimitry Andric list->priority = pri; 39481ad6265SDimitry Andric list->next = next_queue; 39581ad6265SDimitry Andric lst->next = list; 39681ad6265SDimitry Andric } 39781ad6265SDimitry Andric } 39881ad6265SDimitry Andric return thread_data; 39981ad6265SDimitry Andric } 40081ad6265SDimitry Andric 40181ad6265SDimitry Andric // __kmp_push_priority_task: Add a task to the team's priority task deque 40281ad6265SDimitry Andric static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread, 40381ad6265SDimitry Andric kmp_taskdata_t *taskdata, 40481ad6265SDimitry Andric kmp_task_team_t *task_team, 40581ad6265SDimitry Andric kmp_int32 pri) { 40681ad6265SDimitry Andric kmp_thread_data_t *thread_data = NULL; 40781ad6265SDimitry Andric KA_TRACE(20, 40881ad6265SDimitry Andric ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n", 40981ad6265SDimitry Andric gtid, taskdata, pri)); 41081ad6265SDimitry Andric 41181ad6265SDimitry Andric // Find task queue specific to priority value 41281ad6265SDimitry Andric kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list; 41381ad6265SDimitry Andric if (UNLIKELY(lst == NULL)) { 41481ad6265SDimitry Andric __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock); 41581ad6265SDimitry Andric if (task_team->tt.tt_task_pri_list == NULL) { 41681ad6265SDimitry Andric // List of queues is still empty, allocate one. 41781ad6265SDimitry Andric kmp_task_pri_t *list = __kmp_alloc_task_pri_list(); 41881ad6265SDimitry Andric thread_data = &list->td; 41981ad6265SDimitry Andric list->priority = pri; 42081ad6265SDimitry Andric list->next = NULL; 42181ad6265SDimitry Andric task_team->tt.tt_task_pri_list = list; 42281ad6265SDimitry Andric } else { 42381ad6265SDimitry Andric // Other thread initialized a queue. Check if it fits and get thread_data. 42481ad6265SDimitry Andric thread_data = __kmp_get_priority_deque_data(task_team, pri); 42581ad6265SDimitry Andric } 42681ad6265SDimitry Andric __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock); 42781ad6265SDimitry Andric } else { 42881ad6265SDimitry Andric if (lst->priority == pri) { 42981ad6265SDimitry Andric // Found queue of tasks with given priority. 43081ad6265SDimitry Andric thread_data = &lst->td; 43181ad6265SDimitry Andric } else { 43281ad6265SDimitry Andric __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock); 43381ad6265SDimitry Andric thread_data = __kmp_get_priority_deque_data(task_team, pri); 43481ad6265SDimitry Andric __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock); 43581ad6265SDimitry Andric } 43681ad6265SDimitry Andric } 43781ad6265SDimitry Andric KMP_DEBUG_ASSERT(thread_data); 43881ad6265SDimitry Andric 43981ad6265SDimitry Andric __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); 44081ad6265SDimitry Andric // Check if deque is full 44181ad6265SDimitry Andric if (TCR_4(thread_data->td.td_deque_ntasks) >= 44281ad6265SDimitry Andric TASK_DEQUE_SIZE(thread_data->td)) { 44381ad6265SDimitry Andric if (__kmp_enable_task_throttling && 44481ad6265SDimitry Andric __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, 44581ad6265SDimitry Andric thread->th.th_current_task)) { 44681ad6265SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 44781ad6265SDimitry Andric KA_TRACE(20, ("__kmp_push_priority_task: T#%d deque is full; returning " 44881ad6265SDimitry Andric "TASK_NOT_PUSHED for task %p\n", 44981ad6265SDimitry Andric gtid, taskdata)); 45081ad6265SDimitry Andric return TASK_NOT_PUSHED; 45181ad6265SDimitry Andric } else { 45281ad6265SDimitry Andric // expand deque to push the task which is not allowed to execute 45381ad6265SDimitry Andric __kmp_realloc_task_deque(thread, thread_data); 45481ad6265SDimitry Andric } 45581ad6265SDimitry Andric } 45681ad6265SDimitry Andric KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) < 45781ad6265SDimitry Andric TASK_DEQUE_SIZE(thread_data->td)); 45881ad6265SDimitry Andric // Push taskdata. 45981ad6265SDimitry Andric thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata; 46081ad6265SDimitry Andric // Wrap index. 46181ad6265SDimitry Andric thread_data->td.td_deque_tail = 46281ad6265SDimitry Andric (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td); 46381ad6265SDimitry Andric TCW_4(thread_data->td.td_deque_ntasks, 46481ad6265SDimitry Andric TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count 46581ad6265SDimitry Andric KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self 46681ad6265SDimitry Andric KMP_FSYNC_RELEASING(taskdata); // releasing child 46781ad6265SDimitry Andric KA_TRACE(20, ("__kmp_push_priority_task: T#%d returning " 46881ad6265SDimitry Andric "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n", 46981ad6265SDimitry Andric gtid, taskdata, thread_data->td.td_deque_ntasks, 47081ad6265SDimitry Andric thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); 47181ad6265SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 47281ad6265SDimitry Andric task_team->tt.tt_num_task_pri++; // atomic inc 47381ad6265SDimitry Andric return TASK_SUCCESSFULLY_PUSHED; 47481ad6265SDimitry Andric } 47581ad6265SDimitry Andric 4760b57cec5SDimitry Andric // __kmp_push_task: Add a task to the thread's deque 4770b57cec5SDimitry Andric static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { 4780b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 4790b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 480e8d8bef9SDimitry Andric 481349cc55cSDimitry Andric // If we encounter a hidden helper task, and the current thread is not a 482349cc55cSDimitry Andric // hidden helper thread, we have to give the task to any hidden helper thread 483349cc55cSDimitry Andric // starting from its shadow one. 484349cc55cSDimitry Andric if (UNLIKELY(taskdata->td_flags.hidden_helper && 485349cc55cSDimitry Andric !KMP_HIDDEN_HELPER_THREAD(gtid))) { 486349cc55cSDimitry Andric kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid); 487349cc55cSDimitry Andric __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid)); 488349cc55cSDimitry Andric // Signal the hidden helper threads. 489349cc55cSDimitry Andric __kmp_hidden_helper_worker_thread_signal(); 490349cc55cSDimitry Andric return TASK_SUCCESSFULLY_PUSHED; 491e8d8bef9SDimitry Andric } 492e8d8bef9SDimitry Andric 4930b57cec5SDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 4940b57cec5SDimitry Andric kmp_int32 tid = __kmp_tid_from_gtid(gtid); 4950b57cec5SDimitry Andric kmp_thread_data_t *thread_data; 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric KA_TRACE(20, 4980b57cec5SDimitry Andric ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata)); 4990b57cec5SDimitry Andric 500e8d8bef9SDimitry Andric if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) { 5010b57cec5SDimitry Andric // untied task needs to increment counter so that the task structure is not 5020b57cec5SDimitry Andric // freed prematurely 5030b57cec5SDimitry Andric kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count); 5040b57cec5SDimitry Andric KMP_DEBUG_USE_VAR(counter); 5050b57cec5SDimitry Andric KA_TRACE( 5060b57cec5SDimitry Andric 20, 5070b57cec5SDimitry Andric ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n", 5080b57cec5SDimitry Andric gtid, counter, taskdata)); 5090b57cec5SDimitry Andric } 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric // The first check avoids building task_team thread data if serialized 512e8d8bef9SDimitry Andric if (UNLIKELY(taskdata->td_flags.task_serial)) { 5130b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning " 5140b57cec5SDimitry Andric "TASK_NOT_PUSHED for task %p\n", 5150b57cec5SDimitry Andric gtid, taskdata)); 5160b57cec5SDimitry Andric return TASK_NOT_PUSHED; 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric // Now that serialized tasks have returned, we can assume that we are not in 5200b57cec5SDimitry Andric // immediate exec mode 5210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); 522e8d8bef9SDimitry Andric if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) { 5230b57cec5SDimitry Andric __kmp_enable_tasking(task_team, thread); 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE); 5260b57cec5SDimitry Andric KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL); 5270b57cec5SDimitry Andric 52881ad6265SDimitry Andric if (taskdata->td_flags.priority_specified && task->data2.priority > 0 && 52981ad6265SDimitry Andric __kmp_max_task_priority > 0) { 53081ad6265SDimitry Andric int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority); 53181ad6265SDimitry Andric return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri); 53281ad6265SDimitry Andric } 53381ad6265SDimitry Andric 5340b57cec5SDimitry Andric // Find tasking deque specific to encountering thread 5350b57cec5SDimitry Andric thread_data = &task_team->tt.tt_threads_data[tid]; 5360b57cec5SDimitry Andric 537e8d8bef9SDimitry Andric // No lock needed since only owner can allocate. If the task is hidden_helper, 538e8d8bef9SDimitry Andric // we don't need it either because we have initialized the dequeue for hidden 539e8d8bef9SDimitry Andric // helper thread data. 540e8d8bef9SDimitry Andric if (UNLIKELY(thread_data->td.td_deque == NULL)) { 5410b57cec5SDimitry Andric __kmp_alloc_task_deque(thread, thread_data); 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric int locked = 0; 5450b57cec5SDimitry Andric // Check if deque is full 5460b57cec5SDimitry Andric if (TCR_4(thread_data->td.td_deque_ntasks) >= 5470b57cec5SDimitry Andric TASK_DEQUE_SIZE(thread_data->td)) { 5480b57cec5SDimitry Andric if (__kmp_enable_task_throttling && 5490b57cec5SDimitry Andric __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, 5500b57cec5SDimitry Andric thread->th.th_current_task)) { 5510b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning " 5520b57cec5SDimitry Andric "TASK_NOT_PUSHED for task %p\n", 5530b57cec5SDimitry Andric gtid, taskdata)); 5540b57cec5SDimitry Andric return TASK_NOT_PUSHED; 5550b57cec5SDimitry Andric } else { 5560b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); 5570b57cec5SDimitry Andric locked = 1; 5585ffd83dbSDimitry Andric if (TCR_4(thread_data->td.td_deque_ntasks) >= 5595ffd83dbSDimitry Andric TASK_DEQUE_SIZE(thread_data->td)) { 5600b57cec5SDimitry Andric // expand deque to push the task which is not allowed to execute 5610b57cec5SDimitry Andric __kmp_realloc_task_deque(thread, thread_data); 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric } 5645ffd83dbSDimitry Andric } 5650b57cec5SDimitry Andric // Lock the deque for the task push operation 5660b57cec5SDimitry Andric if (!locked) { 5670b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); 5680b57cec5SDimitry Andric // Need to recheck as we can get a proxy task from thread outside of OpenMP 5690b57cec5SDimitry Andric if (TCR_4(thread_data->td.td_deque_ntasks) >= 5700b57cec5SDimitry Andric TASK_DEQUE_SIZE(thread_data->td)) { 5710b57cec5SDimitry Andric if (__kmp_enable_task_throttling && 5720b57cec5SDimitry Andric __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, 5730b57cec5SDimitry Andric thread->th.th_current_task)) { 5740b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 5750b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; " 5760b57cec5SDimitry Andric "returning TASK_NOT_PUSHED for task %p\n", 5770b57cec5SDimitry Andric gtid, taskdata)); 5780b57cec5SDimitry Andric return TASK_NOT_PUSHED; 5790b57cec5SDimitry Andric } else { 5800b57cec5SDimitry Andric // expand deque to push the task which is not allowed to execute 5810b57cec5SDimitry Andric __kmp_realloc_task_deque(thread, thread_data); 5820b57cec5SDimitry Andric } 5830b57cec5SDimitry Andric } 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric // Must have room since no thread can add tasks but calling thread 5860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) < 5870b57cec5SDimitry Andric TASK_DEQUE_SIZE(thread_data->td)); 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric thread_data->td.td_deque[thread_data->td.td_deque_tail] = 5900b57cec5SDimitry Andric taskdata; // Push taskdata 5910b57cec5SDimitry Andric // Wrap index. 5920b57cec5SDimitry Andric thread_data->td.td_deque_tail = 5930b57cec5SDimitry Andric (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td); 5940b57cec5SDimitry Andric TCW_4(thread_data->td.td_deque_ntasks, 5950b57cec5SDimitry Andric TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count 596e8d8bef9SDimitry Andric KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self 597e8d8bef9SDimitry Andric KMP_FSYNC_RELEASING(taskdata); // releasing child 5980b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 5990b57cec5SDimitry Andric "task=%p ntasks=%d head=%u tail=%u\n", 6000b57cec5SDimitry Andric gtid, taskdata, thread_data->td.td_deque_ntasks, 6010b57cec5SDimitry Andric thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric return TASK_SUCCESSFULLY_PUSHED; 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric // __kmp_pop_current_task_from_thread: set up current task from called thread 6090b57cec5SDimitry Andric // when team ends 6100b57cec5SDimitry Andric // 6110b57cec5SDimitry Andric // this_thr: thread structure to set current_task in. 6120b57cec5SDimitry Andric void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) { 6130b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d " 6140b57cec5SDimitry Andric "this_thread=%p, curtask=%p, " 6150b57cec5SDimitry Andric "curtask_parent=%p\n", 6160b57cec5SDimitry Andric 0, this_thr, this_thr->th.th_current_task, 6170b57cec5SDimitry Andric this_thr->th.th_current_task->td_parent)); 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent; 6200b57cec5SDimitry Andric 6210b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d " 6220b57cec5SDimitry Andric "this_thread=%p, curtask=%p, " 6230b57cec5SDimitry Andric "curtask_parent=%p\n", 6240b57cec5SDimitry Andric 0, this_thr, this_thr->th.th_current_task, 6250b57cec5SDimitry Andric this_thr->th.th_current_task->td_parent)); 6260b57cec5SDimitry Andric } 6270b57cec5SDimitry Andric 6280b57cec5SDimitry Andric // __kmp_push_current_task_to_thread: set up current task in called thread for a 6290b57cec5SDimitry Andric // new team 6300b57cec5SDimitry Andric // 6310b57cec5SDimitry Andric // this_thr: thread structure to set up 6320b57cec5SDimitry Andric // team: team for implicit task data 6330b57cec5SDimitry Andric // tid: thread within team to set up 6340b57cec5SDimitry Andric void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team, 6350b57cec5SDimitry Andric int tid) { 6360b57cec5SDimitry Andric // current task of the thread is a parent of the new just created implicit 6370b57cec5SDimitry Andric // tasks of new team 6380b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " 6390b57cec5SDimitry Andric "curtask=%p " 6400b57cec5SDimitry Andric "parent_task=%p\n", 6410b57cec5SDimitry Andric tid, this_thr, this_thr->th.th_current_task, 6420b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[tid].td_parent)); 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr != NULL); 6450b57cec5SDimitry Andric 6460b57cec5SDimitry Andric if (tid == 0) { 6470b57cec5SDimitry Andric if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) { 6480b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[0].td_parent = 6490b57cec5SDimitry Andric this_thr->th.th_current_task; 6500b57cec5SDimitry Andric this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0]; 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric } else { 6530b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[tid].td_parent = 6540b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[0].td_parent; 6550b57cec5SDimitry Andric this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid]; 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " 6590b57cec5SDimitry Andric "curtask=%p " 6600b57cec5SDimitry Andric "parent_task=%p\n", 6610b57cec5SDimitry Andric tid, this_thr, this_thr->th.th_current_task, 6620b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[tid].td_parent)); 6630b57cec5SDimitry Andric } 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric // __kmp_task_start: bookkeeping for a task starting execution 6660b57cec5SDimitry Andric // 6670b57cec5SDimitry Andric // GTID: global thread id of calling thread 6680b57cec5SDimitry Andric // task: task starting execution 6690b57cec5SDimitry Andric // current_task: task suspending 6700b57cec5SDimitry Andric static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task, 6710b57cec5SDimitry Andric kmp_taskdata_t *current_task) { 6720b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 6730b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric KA_TRACE(10, 6760b57cec5SDimitry Andric ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n", 6770b57cec5SDimitry Andric gtid, taskdata, current_task)); 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric // mark currently executing task as suspended 6820b57cec5SDimitry Andric // TODO: GEH - make sure root team implicit task is initialized properly. 6830b57cec5SDimitry Andric // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); 6840b57cec5SDimitry Andric current_task->td_flags.executing = 0; 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric // Add task to stack if tied 6870b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK 6880b57cec5SDimitry Andric if (taskdata->td_flags.tiedness == TASK_TIED) { 6890b57cec5SDimitry Andric __kmp_push_task_stack(gtid, thread, taskdata); 6900b57cec5SDimitry Andric } 6910b57cec5SDimitry Andric #endif /* BUILD_TIED_TASK_STACK */ 6920b57cec5SDimitry Andric 6930b57cec5SDimitry Andric // mark starting task as executing and as current task 6940b57cec5SDimitry Andric thread->th.th_current_task = taskdata; 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 || 6970b57cec5SDimitry Andric taskdata->td_flags.tiedness == TASK_UNTIED); 6980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 || 6990b57cec5SDimitry Andric taskdata->td_flags.tiedness == TASK_UNTIED); 7000b57cec5SDimitry Andric taskdata->td_flags.started = 1; 7010b57cec5SDimitry Andric taskdata->td_flags.executing = 1; 7020b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); 7030b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric // GEH TODO: shouldn't we pass some sort of location identifier here? 7060b57cec5SDimitry Andric // APT: yes, we will pass location here. 7070b57cec5SDimitry Andric // need to store current thread state (in a thread or taskdata structure) 7080b57cec5SDimitry Andric // before setting work_state, otherwise wrong state is set after end of task 7090b57cec5SDimitry Andric 7100b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata)); 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric return; 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric 7150b57cec5SDimitry Andric #if OMPT_SUPPORT 7160b57cec5SDimitry Andric //------------------------------------------------------------------------------ 7170b57cec5SDimitry Andric // __ompt_task_init: 7180b57cec5SDimitry Andric // Initialize OMPT fields maintained by a task. This will only be called after 7190b57cec5SDimitry Andric // ompt_start_tool, so we already know whether ompt is enabled or not. 7200b57cec5SDimitry Andric 7210b57cec5SDimitry Andric static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) { 7220b57cec5SDimitry Andric // The calls to __ompt_task_init already have the ompt_enabled condition. 7230b57cec5SDimitry Andric task->ompt_task_info.task_data.value = 0; 7240b57cec5SDimitry Andric task->ompt_task_info.frame.exit_frame = ompt_data_none; 7250b57cec5SDimitry Andric task->ompt_task_info.frame.enter_frame = ompt_data_none; 726fe6060f1SDimitry Andric task->ompt_task_info.frame.exit_frame_flags = 727fe6060f1SDimitry Andric ompt_frame_runtime | ompt_frame_framepointer; 728fe6060f1SDimitry Andric task->ompt_task_info.frame.enter_frame_flags = 729fe6060f1SDimitry Andric ompt_frame_runtime | ompt_frame_framepointer; 73081ad6265SDimitry Andric task->ompt_task_info.dispatch_chunk.start = 0; 73181ad6265SDimitry Andric task->ompt_task_info.dispatch_chunk.iterations = 0; 7320b57cec5SDimitry Andric } 7330b57cec5SDimitry Andric 7340b57cec5SDimitry Andric // __ompt_task_start: 7350b57cec5SDimitry Andric // Build and trigger task-begin event 7360b57cec5SDimitry Andric static inline void __ompt_task_start(kmp_task_t *task, 7370b57cec5SDimitry Andric kmp_taskdata_t *current_task, 7380b57cec5SDimitry Andric kmp_int32 gtid) { 7390b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 7400b57cec5SDimitry Andric ompt_task_status_t status = ompt_task_switch; 7410b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) { 7420b57cec5SDimitry Andric status = ompt_task_yield; 7430b57cec5SDimitry Andric __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0; 7440b57cec5SDimitry Andric } 7450b57cec5SDimitry Andric /* let OMPT know that we're about to run this task */ 7460b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_task_schedule) { 7470b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( 7480b57cec5SDimitry Andric &(current_task->ompt_task_info.task_data), status, 7490b57cec5SDimitry Andric &(taskdata->ompt_task_info.task_data)); 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric taskdata->ompt_task_info.scheduling_parent = current_task; 7520b57cec5SDimitry Andric } 7530b57cec5SDimitry Andric 7540b57cec5SDimitry Andric // __ompt_task_finish: 7550b57cec5SDimitry Andric // Build and trigger final task-schedule event 7565ffd83dbSDimitry Andric static inline void __ompt_task_finish(kmp_task_t *task, 7575ffd83dbSDimitry Andric kmp_taskdata_t *resumed_task, 7585ffd83dbSDimitry Andric ompt_task_status_t status) { 7595ffd83dbSDimitry Andric if (ompt_enabled.ompt_callback_task_schedule) { 7600b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 7610b57cec5SDimitry Andric if (__kmp_omp_cancellation && taskdata->td_taskgroup && 7620b57cec5SDimitry Andric taskdata->td_taskgroup->cancel_request == cancel_taskgroup) { 7630b57cec5SDimitry Andric status = ompt_task_cancel; 7640b57cec5SDimitry Andric } 7650b57cec5SDimitry Andric 7660b57cec5SDimitry Andric /* let OMPT know that we're returning to the callee task */ 7670b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( 7680b57cec5SDimitry Andric &(taskdata->ompt_task_info.task_data), status, 7695ffd83dbSDimitry Andric (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL)); 7700b57cec5SDimitry Andric } 7710b57cec5SDimitry Andric } 7720b57cec5SDimitry Andric #endif 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric template <bool ompt> 7750b57cec5SDimitry Andric static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, 7760b57cec5SDimitry Andric kmp_task_t *task, 7770b57cec5SDimitry Andric void *frame_address, 7780b57cec5SDimitry Andric void *return_address) { 7790b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 7800b57cec5SDimitry Andric kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; 7810b57cec5SDimitry Andric 7820b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " 7830b57cec5SDimitry Andric "current_task=%p\n", 7840b57cec5SDimitry Andric gtid, loc_ref, taskdata, current_task)); 7850b57cec5SDimitry Andric 786fe6060f1SDimitry Andric if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) { 7870b57cec5SDimitry Andric // untied task needs to increment counter so that the task structure is not 7880b57cec5SDimitry Andric // freed prematurely 7890b57cec5SDimitry Andric kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count); 7900b57cec5SDimitry Andric KMP_DEBUG_USE_VAR(counter); 7910b57cec5SDimitry Andric KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " 7920b57cec5SDimitry Andric "incremented for task %p\n", 7930b57cec5SDimitry Andric gtid, counter, taskdata)); 7940b57cec5SDimitry Andric } 7950b57cec5SDimitry Andric 7960b57cec5SDimitry Andric taskdata->td_flags.task_serial = 7970b57cec5SDimitry Andric 1; // Execute this task immediately, not deferred. 7980b57cec5SDimitry Andric __kmp_task_start(gtid, task, current_task); 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric #if OMPT_SUPPORT 8010b57cec5SDimitry Andric if (ompt) { 8020b57cec5SDimitry Andric if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) { 8030b57cec5SDimitry Andric current_task->ompt_task_info.frame.enter_frame.ptr = 8040b57cec5SDimitry Andric taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address; 8050b57cec5SDimitry Andric current_task->ompt_task_info.frame.enter_frame_flags = 806fe6060f1SDimitry Andric taskdata->ompt_task_info.frame.exit_frame_flags = 807fe6060f1SDimitry Andric ompt_frame_application | ompt_frame_framepointer; 8080b57cec5SDimitry Andric } 8090b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_task_create) { 8100b57cec5SDimitry Andric ompt_task_info_t *parent_info = &(current_task->ompt_task_info); 8110b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_create)( 8120b57cec5SDimitry Andric &(parent_info->task_data), &(parent_info->frame), 8130b57cec5SDimitry Andric &(taskdata->ompt_task_info.task_data), 814*0fca6ea1SDimitry Andric TASK_TYPE_DETAILS_FORMAT(taskdata), 0, return_address); 8150b57cec5SDimitry Andric } 8160b57cec5SDimitry Andric __ompt_task_start(task, current_task, gtid); 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric #endif // OMPT_SUPPORT 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid, 8210b57cec5SDimitry Andric loc_ref, taskdata)); 8220b57cec5SDimitry Andric } 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric #if OMPT_SUPPORT 8250b57cec5SDimitry Andric OMPT_NOINLINE 8260b57cec5SDimitry Andric static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, 8270b57cec5SDimitry Andric kmp_task_t *task, 8280b57cec5SDimitry Andric void *frame_address, 8290b57cec5SDimitry Andric void *return_address) { 8300b57cec5SDimitry Andric __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address, 8310b57cec5SDimitry Andric return_address); 8320b57cec5SDimitry Andric } 8330b57cec5SDimitry Andric #endif // OMPT_SUPPORT 8340b57cec5SDimitry Andric 8350b57cec5SDimitry Andric // __kmpc_omp_task_begin_if0: report that a given serialized task has started 8360b57cec5SDimitry Andric // execution 8370b57cec5SDimitry Andric // 8380b57cec5SDimitry Andric // loc_ref: source location information; points to beginning of task block. 8390b57cec5SDimitry Andric // gtid: global thread number. 8400b57cec5SDimitry Andric // task: task thunk for the started task. 8415f757f3fSDimitry Andric #ifdef __s390x__ 8425f757f3fSDimitry Andric // This is required for OMPT_GET_FRAME_ADDRESS(1) to compile on s390x. 8435f757f3fSDimitry Andric // In order for it to work correctly, the caller also needs to be compiled with 8445f757f3fSDimitry Andric // backchain. If a caller is compiled without backchain, 8455f757f3fSDimitry Andric // OMPT_GET_FRAME_ADDRESS(1) will produce an incorrect value, but will not 8465f757f3fSDimitry Andric // crash. 8475f757f3fSDimitry Andric __attribute__((target("backchain"))) 8485f757f3fSDimitry Andric #endif 8490b57cec5SDimitry Andric void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, 8500b57cec5SDimitry Andric kmp_task_t *task) { 8510b57cec5SDimitry Andric #if OMPT_SUPPORT 8520b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) { 8530b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 8540b57cec5SDimitry Andric __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task, 8550b57cec5SDimitry Andric OMPT_GET_FRAME_ADDRESS(1), 8560b57cec5SDimitry Andric OMPT_LOAD_RETURN_ADDRESS(gtid)); 8570b57cec5SDimitry Andric return; 8580b57cec5SDimitry Andric } 8590b57cec5SDimitry Andric #endif 8600b57cec5SDimitry Andric __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL); 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric #ifdef TASK_UNUSED 8640b57cec5SDimitry Andric // __kmpc_omp_task_begin: report that a given task has started execution 8650b57cec5SDimitry Andric // NEVER GENERATED BY COMPILER, DEPRECATED!!! 8660b57cec5SDimitry Andric void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) { 8670b57cec5SDimitry Andric kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; 8680b57cec5SDimitry Andric 8690b57cec5SDimitry Andric KA_TRACE( 8700b57cec5SDimitry Andric 10, 8710b57cec5SDimitry Andric ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n", 8720b57cec5SDimitry Andric gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task)); 8730b57cec5SDimitry Andric 8740b57cec5SDimitry Andric __kmp_task_start(gtid, task, current_task); 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid, 8770b57cec5SDimitry Andric loc_ref, KMP_TASK_TO_TASKDATA(task))); 8780b57cec5SDimitry Andric return; 8790b57cec5SDimitry Andric } 8800b57cec5SDimitry Andric #endif // TASK_UNUSED 8810b57cec5SDimitry Andric 8820b57cec5SDimitry Andric // __kmp_free_task: free the current task space and the space for shareds 8830b57cec5SDimitry Andric // 8840b57cec5SDimitry Andric // gtid: Global thread ID of calling thread 8850b57cec5SDimitry Andric // taskdata: task to free 8860b57cec5SDimitry Andric // thread: thread data structure of caller 8870b57cec5SDimitry Andric static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata, 8880b57cec5SDimitry Andric kmp_info_t *thread) { 8890b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid, 8900b57cec5SDimitry Andric taskdata)); 8910b57cec5SDimitry Andric 8920b57cec5SDimitry Andric // Check to make sure all flags and counters have the correct values 8930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); 8940b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0); 8950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1); 8960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); 8970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 || 8980b57cec5SDimitry Andric taskdata->td_flags.task_serial == 1); 8990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0); 90081ad6265SDimitry Andric kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata); 90181ad6265SDimitry Andric // Clear data to not be re-used later by mistake. 90281ad6265SDimitry Andric task->data1.destructors = NULL; 90381ad6265SDimitry Andric task->data2.priority = 0; 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric taskdata->td_flags.freed = 1; 90606c3fb27SDimitry Andric #if OMPX_TASKGRAPH 90706c3fb27SDimitry Andric // do not free tasks in taskgraph 90806c3fb27SDimitry Andric if (!taskdata->is_taskgraph) { 90906c3fb27SDimitry Andric #endif 9100b57cec5SDimitry Andric // deallocate the taskdata and shared variable blocks associated with this task 9110b57cec5SDimitry Andric #if USE_FAST_MEMORY 9120b57cec5SDimitry Andric __kmp_fast_free(thread, taskdata); 9130b57cec5SDimitry Andric #else /* ! USE_FAST_MEMORY */ 9140b57cec5SDimitry Andric __kmp_thread_free(thread, taskdata); 9150b57cec5SDimitry Andric #endif 91606c3fb27SDimitry Andric #if OMPX_TASKGRAPH 91706c3fb27SDimitry Andric } else { 91806c3fb27SDimitry Andric taskdata->td_flags.complete = 0; 91906c3fb27SDimitry Andric taskdata->td_flags.started = 0; 92006c3fb27SDimitry Andric taskdata->td_flags.freed = 0; 92106c3fb27SDimitry Andric taskdata->td_flags.executing = 0; 92206c3fb27SDimitry Andric taskdata->td_flags.task_serial = 92306c3fb27SDimitry Andric (taskdata->td_parent->td_flags.final || 92406c3fb27SDimitry Andric taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser); 92506c3fb27SDimitry Andric 92606c3fb27SDimitry Andric // taskdata->td_allow_completion_event.pending_events_count = 1; 92706c3fb27SDimitry Andric KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0); 92806c3fb27SDimitry Andric KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0); 92906c3fb27SDimitry Andric // start at one because counts current task and children 93006c3fb27SDimitry Andric KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1); 93106c3fb27SDimitry Andric } 93206c3fb27SDimitry Andric #endif 93306c3fb27SDimitry Andric 9340b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata)); 9350b57cec5SDimitry Andric } 9360b57cec5SDimitry Andric 9370b57cec5SDimitry Andric // __kmp_free_task_and_ancestors: free the current task and ancestors without 9380b57cec5SDimitry Andric // children 9390b57cec5SDimitry Andric // 9400b57cec5SDimitry Andric // gtid: Global thread ID of calling thread 9410b57cec5SDimitry Andric // taskdata: task to free 9420b57cec5SDimitry Andric // thread: thread data structure of caller 9430b57cec5SDimitry Andric static void __kmp_free_task_and_ancestors(kmp_int32 gtid, 9440b57cec5SDimitry Andric kmp_taskdata_t *taskdata, 9450b57cec5SDimitry Andric kmp_info_t *thread) { 9460b57cec5SDimitry Andric // Proxy tasks must always be allowed to free their parents 9470b57cec5SDimitry Andric // because they can be run in background even in serial mode. 9480b57cec5SDimitry Andric kmp_int32 team_serial = 9490b57cec5SDimitry Andric (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) && 9500b57cec5SDimitry Andric !taskdata->td_flags.proxy; 9510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); 9520b57cec5SDimitry Andric 9530b57cec5SDimitry Andric kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1; 9540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(children >= 0); 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric // Now, go up the ancestor tree to see if any ancestors can now be freed. 9570b57cec5SDimitry Andric while (children == 0) { 9580b57cec5SDimitry Andric kmp_taskdata_t *parent_taskdata = taskdata->td_parent; 9590b57cec5SDimitry Andric 9600b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 9610b57cec5SDimitry Andric "and freeing itself\n", 9620b57cec5SDimitry Andric gtid, taskdata)); 9630b57cec5SDimitry Andric 9640b57cec5SDimitry Andric // --- Deallocate my ancestor task --- 9650b57cec5SDimitry Andric __kmp_free_task(gtid, taskdata, thread); 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric taskdata = parent_taskdata; 9680b57cec5SDimitry Andric 9690b57cec5SDimitry Andric if (team_serial) 9700b57cec5SDimitry Andric return; 9710b57cec5SDimitry Andric // Stop checking ancestors at implicit task instead of walking up ancestor 9720b57cec5SDimitry Andric // tree to avoid premature deallocation of ancestors. 9730b57cec5SDimitry Andric if (taskdata->td_flags.tasktype == TASK_IMPLICIT) { 9740b57cec5SDimitry Andric if (taskdata->td_dephash) { // do we need to cleanup dephash? 9750b57cec5SDimitry Andric int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks); 9760b57cec5SDimitry Andric kmp_tasking_flags_t flags_old = taskdata->td_flags; 9770b57cec5SDimitry Andric if (children == 0 && flags_old.complete == 1) { 9780b57cec5SDimitry Andric kmp_tasking_flags_t flags_new = flags_old; 9790b57cec5SDimitry Andric flags_new.complete = 0; 9800b57cec5SDimitry Andric if (KMP_COMPARE_AND_STORE_ACQ32( 9810b57cec5SDimitry Andric RCAST(kmp_int32 *, &taskdata->td_flags), 9820b57cec5SDimitry Andric *RCAST(kmp_int32 *, &flags_old), 9830b57cec5SDimitry Andric *RCAST(kmp_int32 *, &flags_new))) { 9840b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_task_and_ancestors: T#%d cleans " 9850b57cec5SDimitry Andric "dephash of implicit task %p\n", 9860b57cec5SDimitry Andric gtid, taskdata)); 9870b57cec5SDimitry Andric // cleanup dephash of finished implicit task 9880b57cec5SDimitry Andric __kmp_dephash_free_entries(thread, taskdata->td_dephash); 9890b57cec5SDimitry Andric } 9900b57cec5SDimitry Andric } 9910b57cec5SDimitry Andric } 9920b57cec5SDimitry Andric return; 9930b57cec5SDimitry Andric } 9940b57cec5SDimitry Andric // Predecrement simulated by "- 1" calculation 9950b57cec5SDimitry Andric children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1; 9960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(children >= 0); 9970b57cec5SDimitry Andric } 9980b57cec5SDimitry Andric 9990b57cec5SDimitry Andric KA_TRACE( 10000b57cec5SDimitry Andric 20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 10010b57cec5SDimitry Andric "not freeing it yet\n", 10020b57cec5SDimitry Andric gtid, taskdata, children)); 10030b57cec5SDimitry Andric } 10040b57cec5SDimitry Andric 1005349cc55cSDimitry Andric // Only need to keep track of child task counts if any of the following: 1006349cc55cSDimitry Andric // 1. team parallel and tasking not serialized; 1007349cc55cSDimitry Andric // 2. it is a proxy or detachable or hidden helper task 1008349cc55cSDimitry Andric // 3. the children counter of its parent task is greater than 0. 1009349cc55cSDimitry Andric // The reason for the 3rd one is for serialized team that found detached task, 1010349cc55cSDimitry Andric // hidden helper task, T. In this case, the execution of T is still deferred, 1011349cc55cSDimitry Andric // and it is also possible that a regular task depends on T. In this case, if we 1012349cc55cSDimitry Andric // don't track the children, task synchronization will be broken. 1013349cc55cSDimitry Andric static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) { 1014349cc55cSDimitry Andric kmp_tasking_flags_t flags = taskdata->td_flags; 1015349cc55cSDimitry Andric bool ret = !(flags.team_serial || flags.tasking_ser); 1016349cc55cSDimitry Andric ret = ret || flags.proxy == TASK_PROXY || 1017349cc55cSDimitry Andric flags.detachable == TASK_DETACHABLE || flags.hidden_helper; 1018349cc55cSDimitry Andric ret = ret || 1019349cc55cSDimitry Andric KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0; 102006c3fb27SDimitry Andric #if OMPX_TASKGRAPH 102106c3fb27SDimitry Andric if (taskdata->td_taskgroup && taskdata->is_taskgraph) 102206c3fb27SDimitry Andric ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_taskgroup->count) > 0; 102306c3fb27SDimitry Andric #endif 1024349cc55cSDimitry Andric return ret; 1025349cc55cSDimitry Andric } 1026349cc55cSDimitry Andric 10270b57cec5SDimitry Andric // __kmp_task_finish: bookkeeping to do when a task finishes execution 10280b57cec5SDimitry Andric // 10290b57cec5SDimitry Andric // gtid: global thread ID for calling thread 10300b57cec5SDimitry Andric // task: task to be finished 10310b57cec5SDimitry Andric // resumed_task: task to be resumed. (may be NULL if task is serialized) 10325ffd83dbSDimitry Andric // 10335ffd83dbSDimitry Andric // template<ompt>: effectively ompt_enabled.enabled!=0 10345ffd83dbSDimitry Andric // the version with ompt=false is inlined, allowing to optimize away all ompt 10355ffd83dbSDimitry Andric // code in this case 10360b57cec5SDimitry Andric template <bool ompt> 10370b57cec5SDimitry Andric static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, 10380b57cec5SDimitry Andric kmp_taskdata_t *resumed_task) { 10390b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 10400b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 10410b57cec5SDimitry Andric kmp_task_team_t *task_team = 10420b57cec5SDimitry Andric thread->th.th_task_team; // might be NULL for serial teams... 104306c3fb27SDimitry Andric #if OMPX_TASKGRAPH 104406c3fb27SDimitry Andric // to avoid seg fault when we need to access taskdata->td_flags after free when using vanilla taskloop 104506c3fb27SDimitry Andric bool is_taskgraph; 104606c3fb27SDimitry Andric #endif 1047349cc55cSDimitry Andric #if KMP_DEBUG 10480b57cec5SDimitry Andric kmp_int32 children = 0; 1049349cc55cSDimitry Andric #endif 10500b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming " 10510b57cec5SDimitry Andric "task %p\n", 10520b57cec5SDimitry Andric gtid, taskdata, resumed_task)); 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); 10550b57cec5SDimitry Andric 105606c3fb27SDimitry Andric #if OMPX_TASKGRAPH 105706c3fb27SDimitry Andric is_taskgraph = taskdata->is_taskgraph; 105806c3fb27SDimitry Andric #endif 105906c3fb27SDimitry Andric 10600b57cec5SDimitry Andric // Pop task from stack if tied 10610b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK 10620b57cec5SDimitry Andric if (taskdata->td_flags.tiedness == TASK_TIED) { 10630b57cec5SDimitry Andric __kmp_pop_task_stack(gtid, thread, taskdata); 10640b57cec5SDimitry Andric } 10650b57cec5SDimitry Andric #endif /* BUILD_TIED_TASK_STACK */ 10660b57cec5SDimitry Andric 1067e8d8bef9SDimitry Andric if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) { 10680b57cec5SDimitry Andric // untied task needs to check the counter so that the task structure is not 10690b57cec5SDimitry Andric // freed prematurely 10700b57cec5SDimitry Andric kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1; 10710b57cec5SDimitry Andric KA_TRACE( 10720b57cec5SDimitry Andric 20, 10730b57cec5SDimitry Andric ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n", 10740b57cec5SDimitry Andric gtid, counter, taskdata)); 10750b57cec5SDimitry Andric if (counter > 0) { 10760b57cec5SDimitry Andric // untied task is not done, to be continued possibly by other thread, do 10770b57cec5SDimitry Andric // not free it now 10780b57cec5SDimitry Andric if (resumed_task == NULL) { 10790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial); 10800b57cec5SDimitry Andric resumed_task = taskdata->td_parent; // In a serialized task, the resumed 10810b57cec5SDimitry Andric // task is the parent 10820b57cec5SDimitry Andric } 10830b57cec5SDimitry Andric thread->th.th_current_task = resumed_task; // restore current_task 10840b57cec5SDimitry Andric resumed_task->td_flags.executing = 1; // resume previous task 10850b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, " 10860b57cec5SDimitry Andric "resuming task %p\n", 10870b57cec5SDimitry Andric gtid, taskdata, resumed_task)); 10880b57cec5SDimitry Andric return; 10890b57cec5SDimitry Andric } 10900b57cec5SDimitry Andric } 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andric // bookkeeping for resuming task: 10930b57cec5SDimitry Andric // GEH - note tasking_ser => task_serial 10940b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 10950b57cec5SDimitry Andric (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == 10960b57cec5SDimitry Andric taskdata->td_flags.task_serial); 10970b57cec5SDimitry Andric if (taskdata->td_flags.task_serial) { 10980b57cec5SDimitry Andric if (resumed_task == NULL) { 10990b57cec5SDimitry Andric resumed_task = taskdata->td_parent; // In a serialized task, the resumed 11000b57cec5SDimitry Andric // task is the parent 11010b57cec5SDimitry Andric } 11020b57cec5SDimitry Andric } else { 11030b57cec5SDimitry Andric KMP_DEBUG_ASSERT(resumed_task != 1104480093f4SDimitry Andric NULL); // verify that resumed task is passed as argument 11050b57cec5SDimitry Andric } 11060b57cec5SDimitry Andric 11075ffd83dbSDimitry Andric /* If the tasks' destructor thunk flag has been set, we need to invoke the 11085ffd83dbSDimitry Andric destructor thunk that has been generated by the compiler. The code is 11095ffd83dbSDimitry Andric placed here, since at this point other tasks might have been released 11105ffd83dbSDimitry Andric hence overlapping the destructor invocations with some other work in the 11115ffd83dbSDimitry Andric released tasks. The OpenMP spec is not specific on when the destructors 11125ffd83dbSDimitry Andric are invoked, so we should be free to choose. */ 1113fe6060f1SDimitry Andric if (UNLIKELY(taskdata->td_flags.destructors_thunk)) { 11145ffd83dbSDimitry Andric kmp_routine_entry_t destr_thunk = task->data1.destructors; 11155ffd83dbSDimitry Andric KMP_ASSERT(destr_thunk); 11165ffd83dbSDimitry Andric destr_thunk(gtid, task); 11175ffd83dbSDimitry Andric } 11185ffd83dbSDimitry Andric 11195ffd83dbSDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); 11205ffd83dbSDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1); 11215ffd83dbSDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); 11225ffd83dbSDimitry Andric 1123bdd1243dSDimitry Andric bool completed = true; 1124fe6060f1SDimitry Andric if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) { 11255ffd83dbSDimitry Andric if (taskdata->td_allow_completion_event.type == 11265ffd83dbSDimitry Andric KMP_EVENT_ALLOW_COMPLETION) { 11275ffd83dbSDimitry Andric // event hasn't been fulfilled yet. Try to detach task. 11285ffd83dbSDimitry Andric __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid); 11295ffd83dbSDimitry Andric if (taskdata->td_allow_completion_event.type == 11305ffd83dbSDimitry Andric KMP_EVENT_ALLOW_COMPLETION) { 11315ffd83dbSDimitry Andric // task finished execution 11325ffd83dbSDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1); 11335ffd83dbSDimitry Andric taskdata->td_flags.executing = 0; // suspend the finishing task 11345ffd83dbSDimitry Andric 11355ffd83dbSDimitry Andric #if OMPT_SUPPORT 11365ffd83dbSDimitry Andric // For a detached task, which is not completed, we switch back 11375ffd83dbSDimitry Andric // the omp_fulfill_event signals completion 11385ffd83dbSDimitry Andric // locking is necessary to avoid a race with ompt_task_late_fulfill 11395ffd83dbSDimitry Andric if (ompt) 11405ffd83dbSDimitry Andric __ompt_task_finish(task, resumed_task, ompt_task_detach); 11415ffd83dbSDimitry Andric #endif 11425ffd83dbSDimitry Andric 11435ffd83dbSDimitry Andric // no access to taskdata after this point! 11445ffd83dbSDimitry Andric // __kmp_fulfill_event might free taskdata at any time from now 11455ffd83dbSDimitry Andric 11465ffd83dbSDimitry Andric taskdata->td_flags.proxy = TASK_PROXY; // proxify! 1147bdd1243dSDimitry Andric completed = false; 11485ffd83dbSDimitry Andric } 11495ffd83dbSDimitry Andric __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid); 11505ffd83dbSDimitry Andric } 11515ffd83dbSDimitry Andric } 11525ffd83dbSDimitry Andric 1153bdd1243dSDimitry Andric // Tasks with valid target async handles must be re-enqueued. 1154bdd1243dSDimitry Andric if (taskdata->td_target_data.async_handle != NULL) { 1155bdd1243dSDimitry Andric // Note: no need to translate gtid to its shadow. If the current thread is a 1156bdd1243dSDimitry Andric // hidden helper one, then the gtid is already correct. Otherwise, hidden 1157bdd1243dSDimitry Andric // helper threads are disabled, and gtid refers to a OpenMP thread. 1158*0fca6ea1SDimitry Andric #if OMPT_SUPPORT 1159*0fca6ea1SDimitry Andric if (ompt) { 1160*0fca6ea1SDimitry Andric __ompt_task_finish(task, resumed_task, ompt_task_switch); 1161*0fca6ea1SDimitry Andric } 1162*0fca6ea1SDimitry Andric #endif 1163bdd1243dSDimitry Andric __kmpc_give_task(task, __kmp_tid_from_gtid(gtid)); 1164bdd1243dSDimitry Andric if (KMP_HIDDEN_HELPER_THREAD(gtid)) 1165bdd1243dSDimitry Andric __kmp_hidden_helper_worker_thread_signal(); 1166bdd1243dSDimitry Andric completed = false; 1167bdd1243dSDimitry Andric } 1168bdd1243dSDimitry Andric 1169bdd1243dSDimitry Andric if (completed) { 11705ffd83dbSDimitry Andric taskdata->td_flags.complete = 1; // mark the task as completed 117106c3fb27SDimitry Andric #if OMPX_TASKGRAPH 117206c3fb27SDimitry Andric taskdata->td_flags.onced = 1; // mark the task as ran once already 117306c3fb27SDimitry Andric #endif 11745ffd83dbSDimitry Andric 11755ffd83dbSDimitry Andric #if OMPT_SUPPORT 11765ffd83dbSDimitry Andric // This is not a detached task, we are done here 11775ffd83dbSDimitry Andric if (ompt) 11785ffd83dbSDimitry Andric __ompt_task_finish(task, resumed_task, ompt_task_complete); 11795ffd83dbSDimitry Andric #endif 1180349cc55cSDimitry Andric // TODO: What would be the balance between the conditions in the function 1181349cc55cSDimitry Andric // and an atomic operation? 1182349cc55cSDimitry Andric if (__kmp_track_children_task(taskdata)) { 1183fe6060f1SDimitry Andric __kmp_release_deps(gtid, taskdata); 11845ffd83dbSDimitry Andric // Predecrement simulated by "- 1" calculation 1185349cc55cSDimitry Andric #if KMP_DEBUG 1186349cc55cSDimitry Andric children = -1 + 1187349cc55cSDimitry Andric #endif 1188349cc55cSDimitry Andric KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks); 11895ffd83dbSDimitry Andric KMP_DEBUG_ASSERT(children >= 0); 119006c3fb27SDimitry Andric #if OMPX_TASKGRAPH 119106c3fb27SDimitry Andric if (taskdata->td_taskgroup && !taskdata->is_taskgraph) 119206c3fb27SDimitry Andric #else 11935ffd83dbSDimitry Andric if (taskdata->td_taskgroup) 119406c3fb27SDimitry Andric #endif 11955ffd83dbSDimitry Andric KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count); 1196fe6060f1SDimitry Andric } else if (task_team && (task_team->tt.tt_found_proxy_tasks || 1197fe6060f1SDimitry Andric task_team->tt.tt_hidden_helper_task_encountered)) { 1198fe6060f1SDimitry Andric // if we found proxy or hidden helper tasks there could exist a dependency 1199fe6060f1SDimitry Andric // chain with the proxy task as origin 12005ffd83dbSDimitry Andric __kmp_release_deps(gtid, taskdata); 12015ffd83dbSDimitry Andric } 12025ffd83dbSDimitry Andric // td_flags.executing must be marked as 0 after __kmp_release_deps has been 12035ffd83dbSDimitry Andric // called. Othertwise, if a task is executed immediately from the 12045ffd83dbSDimitry Andric // release_deps code, the flag will be reset to 1 again by this same 12055ffd83dbSDimitry Andric // function 12065ffd83dbSDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1); 12075ffd83dbSDimitry Andric taskdata->td_flags.executing = 0; // suspend the finishing task 1208bdd1243dSDimitry Andric 1209bdd1243dSDimitry Andric // Decrement the counter of hidden helper tasks to be executed. 1210bdd1243dSDimitry Andric if (taskdata->td_flags.hidden_helper) { 1211bdd1243dSDimitry Andric // Hidden helper tasks can only be executed by hidden helper threads. 1212bdd1243dSDimitry Andric KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid)); 1213bdd1243dSDimitry Andric KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks); 1214bdd1243dSDimitry Andric } 12155ffd83dbSDimitry Andric } 12165ffd83dbSDimitry Andric 12175ffd83dbSDimitry Andric KA_TRACE( 12185ffd83dbSDimitry Andric 20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", 12195ffd83dbSDimitry Andric gtid, taskdata, children)); 12205ffd83dbSDimitry Andric 12210b57cec5SDimitry Andric // Free this task and then ancestor tasks if they have no children. 12220b57cec5SDimitry Andric // Restore th_current_task first as suggested by John: 12230b57cec5SDimitry Andric // johnmc: if an asynchronous inquiry peers into the runtime system 12240b57cec5SDimitry Andric // it doesn't see the freed task as the current task. 12250b57cec5SDimitry Andric thread->th.th_current_task = resumed_task; 1226bdd1243dSDimitry Andric if (completed) 12270b57cec5SDimitry Andric __kmp_free_task_and_ancestors(gtid, taskdata, thread); 12280b57cec5SDimitry Andric 12290b57cec5SDimitry Andric // TODO: GEH - make sure root team implicit task is initialized properly. 12300b57cec5SDimitry Andric // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); 12310b57cec5SDimitry Andric resumed_task->td_flags.executing = 1; // resume previous task 12320b57cec5SDimitry Andric 123306c3fb27SDimitry Andric #if OMPX_TASKGRAPH 123406c3fb27SDimitry Andric if (is_taskgraph && __kmp_track_children_task(taskdata) && 123506c3fb27SDimitry Andric taskdata->td_taskgroup) { 123606c3fb27SDimitry Andric // TDG: we only release taskgroup barrier here because 123706c3fb27SDimitry Andric // free_task_and_ancestors will call 123806c3fb27SDimitry Andric // __kmp_free_task, which resets all task parameters such as 123906c3fb27SDimitry Andric // taskdata->started, etc. If we release the barrier earlier, these 124006c3fb27SDimitry Andric // parameters could be read before being reset. This is not an issue for 124106c3fb27SDimitry Andric // non-TDG implementation because we never reuse a task(data) structure 124206c3fb27SDimitry Andric KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count); 124306c3fb27SDimitry Andric } 124406c3fb27SDimitry Andric #endif 124506c3fb27SDimitry Andric 12460b57cec5SDimitry Andric KA_TRACE( 12470b57cec5SDimitry Andric 10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n", 12480b57cec5SDimitry Andric gtid, taskdata, resumed_task)); 12490b57cec5SDimitry Andric 12500b57cec5SDimitry Andric return; 12510b57cec5SDimitry Andric } 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andric template <bool ompt> 12540b57cec5SDimitry Andric static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref, 12550b57cec5SDimitry Andric kmp_int32 gtid, 12560b57cec5SDimitry Andric kmp_task_t *task) { 12570b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", 12580b57cec5SDimitry Andric gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); 1259fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 12600b57cec5SDimitry Andric // this routine will provide task to resume 12610b57cec5SDimitry Andric __kmp_task_finish<ompt>(gtid, task, NULL); 12620b57cec5SDimitry Andric 12630b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", 12640b57cec5SDimitry Andric gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); 12650b57cec5SDimitry Andric 12660b57cec5SDimitry Andric #if OMPT_SUPPORT 12670b57cec5SDimitry Andric if (ompt) { 12680b57cec5SDimitry Andric ompt_frame_t *ompt_frame; 12690b57cec5SDimitry Andric __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 12700b57cec5SDimitry Andric ompt_frame->enter_frame = ompt_data_none; 1271fe6060f1SDimitry Andric ompt_frame->enter_frame_flags = 1272fe6060f1SDimitry Andric ompt_frame_runtime | ompt_frame_framepointer; 12730b57cec5SDimitry Andric } 12740b57cec5SDimitry Andric #endif 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric return; 12770b57cec5SDimitry Andric } 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric #if OMPT_SUPPORT 12800b57cec5SDimitry Andric OMPT_NOINLINE 12810b57cec5SDimitry Andric void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, 12820b57cec5SDimitry Andric kmp_task_t *task) { 12830b57cec5SDimitry Andric __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task); 12840b57cec5SDimitry Andric } 12850b57cec5SDimitry Andric #endif // OMPT_SUPPORT 12860b57cec5SDimitry Andric 12870b57cec5SDimitry Andric // __kmpc_omp_task_complete_if0: report that a task has completed execution 12880b57cec5SDimitry Andric // 12890b57cec5SDimitry Andric // loc_ref: source location information; points to end of task block. 12900b57cec5SDimitry Andric // gtid: global thread number. 12910b57cec5SDimitry Andric // task: task thunk for the completed task. 12920b57cec5SDimitry Andric void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, 12930b57cec5SDimitry Andric kmp_task_t *task) { 12940b57cec5SDimitry Andric #if OMPT_SUPPORT 12950b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) { 12960b57cec5SDimitry Andric __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task); 12970b57cec5SDimitry Andric return; 12980b57cec5SDimitry Andric } 12990b57cec5SDimitry Andric #endif 13000b57cec5SDimitry Andric __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task); 13010b57cec5SDimitry Andric } 13020b57cec5SDimitry Andric 13030b57cec5SDimitry Andric #ifdef TASK_UNUSED 13040b57cec5SDimitry Andric // __kmpc_omp_task_complete: report that a task has completed execution 13050b57cec5SDimitry Andric // NEVER GENERATED BY COMPILER, DEPRECATED!!! 13060b57cec5SDimitry Andric void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid, 13070b57cec5SDimitry Andric kmp_task_t *task) { 13080b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid, 13090b57cec5SDimitry Andric loc_ref, KMP_TASK_TO_TASKDATA(task))); 13100b57cec5SDimitry Andric 13110b57cec5SDimitry Andric __kmp_task_finish<false>(gtid, task, 13120b57cec5SDimitry Andric NULL); // Not sure how to find task to resume 13130b57cec5SDimitry Andric 13140b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid, 13150b57cec5SDimitry Andric loc_ref, KMP_TASK_TO_TASKDATA(task))); 13160b57cec5SDimitry Andric return; 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric #endif // TASK_UNUSED 13190b57cec5SDimitry Andric 13200b57cec5SDimitry Andric // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit 13210b57cec5SDimitry Andric // task for a given thread 13220b57cec5SDimitry Andric // 13230b57cec5SDimitry Andric // loc_ref: reference to source location of parallel region 13240b57cec5SDimitry Andric // this_thr: thread data structure corresponding to implicit task 13250b57cec5SDimitry Andric // team: team for this_thr 13260b57cec5SDimitry Andric // tid: thread id of given thread within team 13270b57cec5SDimitry Andric // set_curr_task: TRUE if need to push current task to thread 13280b57cec5SDimitry Andric // NOTE: Routine does not set up the implicit task ICVS. This is assumed to 13290b57cec5SDimitry Andric // have already been done elsewhere. 13300b57cec5SDimitry Andric // TODO: Get better loc_ref. Value passed in may be NULL 13310b57cec5SDimitry Andric void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, 13320b57cec5SDimitry Andric kmp_team_t *team, int tid, int set_curr_task) { 13330b57cec5SDimitry Andric kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid]; 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric KF_TRACE( 13360b57cec5SDimitry Andric 10, 13370b57cec5SDimitry Andric ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n", 13380b57cec5SDimitry Andric tid, team, task, set_curr_task ? "TRUE" : "FALSE")); 13390b57cec5SDimitry Andric 13400b57cec5SDimitry Andric task->td_task_id = KMP_GEN_TASK_ID(); 13410b57cec5SDimitry Andric task->td_team = team; 13420b57cec5SDimitry Andric // task->td_parent = NULL; // fix for CQ230101 (broken parent task info 13430b57cec5SDimitry Andric // in debugger) 13440b57cec5SDimitry Andric task->td_ident = loc_ref; 13450b57cec5SDimitry Andric task->td_taskwait_ident = NULL; 13460b57cec5SDimitry Andric task->td_taskwait_counter = 0; 13470b57cec5SDimitry Andric task->td_taskwait_thread = 0; 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andric task->td_flags.tiedness = TASK_TIED; 13500b57cec5SDimitry Andric task->td_flags.tasktype = TASK_IMPLICIT; 13510b57cec5SDimitry Andric task->td_flags.proxy = TASK_FULL; 13520b57cec5SDimitry Andric 13530b57cec5SDimitry Andric // All implicit tasks are executed immediately, not deferred 13540b57cec5SDimitry Andric task->td_flags.task_serial = 1; 13550b57cec5SDimitry Andric task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec); 13560b57cec5SDimitry Andric task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0; 13570b57cec5SDimitry Andric 13580b57cec5SDimitry Andric task->td_flags.started = 1; 13590b57cec5SDimitry Andric task->td_flags.executing = 1; 13600b57cec5SDimitry Andric task->td_flags.complete = 0; 13610b57cec5SDimitry Andric task->td_flags.freed = 0; 136206c3fb27SDimitry Andric #if OMPX_TASKGRAPH 136306c3fb27SDimitry Andric task->td_flags.onced = 0; 136406c3fb27SDimitry Andric #endif 13650b57cec5SDimitry Andric 13660b57cec5SDimitry Andric task->td_depnode = NULL; 13670b57cec5SDimitry Andric task->td_last_tied = task; 13680b57cec5SDimitry Andric task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED; 13690b57cec5SDimitry Andric 13700b57cec5SDimitry Andric if (set_curr_task) { // only do this init first time thread is created 13710b57cec5SDimitry Andric KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0); 13720b57cec5SDimitry Andric // Not used: don't need to deallocate implicit task 13730b57cec5SDimitry Andric KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0); 13740b57cec5SDimitry Andric task->td_taskgroup = NULL; // An implicit task does not have taskgroup 13750b57cec5SDimitry Andric task->td_dephash = NULL; 13760b57cec5SDimitry Andric __kmp_push_current_task_to_thread(this_thr, team, tid); 13770b57cec5SDimitry Andric } else { 13780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0); 13790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0); 13800b57cec5SDimitry Andric } 13810b57cec5SDimitry Andric 13820b57cec5SDimitry Andric #if OMPT_SUPPORT 13830b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) 13840b57cec5SDimitry Andric __ompt_task_init(task, tid); 13850b57cec5SDimitry Andric #endif 13860b57cec5SDimitry Andric 13870b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid, 13880b57cec5SDimitry Andric team, task)); 13890b57cec5SDimitry Andric } 13900b57cec5SDimitry Andric 13910b57cec5SDimitry Andric // __kmp_finish_implicit_task: Release resources associated to implicit tasks 13920b57cec5SDimitry Andric // at the end of parallel regions. Some resources are kept for reuse in the next 13930b57cec5SDimitry Andric // parallel region. 13940b57cec5SDimitry Andric // 13950b57cec5SDimitry Andric // thread: thread data structure corresponding to implicit task 13960b57cec5SDimitry Andric void __kmp_finish_implicit_task(kmp_info_t *thread) { 13970b57cec5SDimitry Andric kmp_taskdata_t *task = thread->th.th_current_task; 13980b57cec5SDimitry Andric if (task->td_dephash) { 13990b57cec5SDimitry Andric int children; 14000b57cec5SDimitry Andric task->td_flags.complete = 1; 140106c3fb27SDimitry Andric #if OMPX_TASKGRAPH 140206c3fb27SDimitry Andric task->td_flags.onced = 1; 140306c3fb27SDimitry Andric #endif 14040b57cec5SDimitry Andric children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks); 14050b57cec5SDimitry Andric kmp_tasking_flags_t flags_old = task->td_flags; 14060b57cec5SDimitry Andric if (children == 0 && flags_old.complete == 1) { 14070b57cec5SDimitry Andric kmp_tasking_flags_t flags_new = flags_old; 14080b57cec5SDimitry Andric flags_new.complete = 0; 14090b57cec5SDimitry Andric if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags), 14100b57cec5SDimitry Andric *RCAST(kmp_int32 *, &flags_old), 14110b57cec5SDimitry Andric *RCAST(kmp_int32 *, &flags_new))) { 14120b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_finish_implicit_task: T#%d cleans " 14130b57cec5SDimitry Andric "dephash of implicit task %p\n", 14140b57cec5SDimitry Andric thread->th.th_info.ds.ds_gtid, task)); 14150b57cec5SDimitry Andric __kmp_dephash_free_entries(thread, task->td_dephash); 14160b57cec5SDimitry Andric } 14170b57cec5SDimitry Andric } 14180b57cec5SDimitry Andric } 14190b57cec5SDimitry Andric } 14200b57cec5SDimitry Andric 14210b57cec5SDimitry Andric // __kmp_free_implicit_task: Release resources associated to implicit tasks 14220b57cec5SDimitry Andric // when these are destroyed regions 14230b57cec5SDimitry Andric // 14240b57cec5SDimitry Andric // thread: thread data structure corresponding to implicit task 14250b57cec5SDimitry Andric void __kmp_free_implicit_task(kmp_info_t *thread) { 14260b57cec5SDimitry Andric kmp_taskdata_t *task = thread->th.th_current_task; 14270b57cec5SDimitry Andric if (task && task->td_dephash) { 14280b57cec5SDimitry Andric __kmp_dephash_free(thread, task->td_dephash); 14290b57cec5SDimitry Andric task->td_dephash = NULL; 14300b57cec5SDimitry Andric } 14310b57cec5SDimitry Andric } 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andric // Round up a size to a power of two specified by val: Used to insert padding 14340b57cec5SDimitry Andric // between structures co-allocated using a single malloc() call 14350b57cec5SDimitry Andric static size_t __kmp_round_up_to_val(size_t size, size_t val) { 14360b57cec5SDimitry Andric if (size & (val - 1)) { 14370b57cec5SDimitry Andric size &= ~(val - 1); 14380b57cec5SDimitry Andric if (size <= KMP_SIZE_T_MAX - val) { 14390b57cec5SDimitry Andric size += val; // Round up if there is no overflow. 14400b57cec5SDimitry Andric } 14410b57cec5SDimitry Andric } 14420b57cec5SDimitry Andric return size; 14430b57cec5SDimitry Andric } // __kmp_round_up_to_va 14440b57cec5SDimitry Andric 14450b57cec5SDimitry Andric // __kmp_task_alloc: Allocate the taskdata and task data structures for a task 14460b57cec5SDimitry Andric // 14470b57cec5SDimitry Andric // loc_ref: source location information 14480b57cec5SDimitry Andric // gtid: global thread number. 14490b57cec5SDimitry Andric // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' 14500b57cec5SDimitry Andric // task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine. 14510b57cec5SDimitry Andric // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including 14520b57cec5SDimitry Andric // private vars accessed in task. 14530b57cec5SDimitry Andric // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed 14540b57cec5SDimitry Andric // in task. 14550b57cec5SDimitry Andric // task_entry: Pointer to task code entry point generated by compiler. 14560b57cec5SDimitry Andric // returns: a pointer to the allocated kmp_task_t structure (task). 14570b57cec5SDimitry Andric kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, 14580b57cec5SDimitry Andric kmp_tasking_flags_t *flags, 14590b57cec5SDimitry Andric size_t sizeof_kmp_task_t, size_t sizeof_shareds, 14600b57cec5SDimitry Andric kmp_routine_entry_t task_entry) { 14610b57cec5SDimitry Andric kmp_task_t *task; 14620b57cec5SDimitry Andric kmp_taskdata_t *taskdata; 14630b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 14640b57cec5SDimitry Andric kmp_team_t *team = thread->th.th_team; 14650b57cec5SDimitry Andric kmp_taskdata_t *parent_task = thread->th.th_current_task; 14660b57cec5SDimitry Andric size_t shareds_offset; 14670b57cec5SDimitry Andric 1468e8d8bef9SDimitry Andric if (UNLIKELY(!TCR_4(__kmp_init_middle))) 14690b57cec5SDimitry Andric __kmp_middle_initialize(); 14700b57cec5SDimitry Andric 1471e8d8bef9SDimitry Andric if (flags->hidden_helper) { 1472e8d8bef9SDimitry Andric if (__kmp_enable_hidden_helper) { 1473e8d8bef9SDimitry Andric if (!TCR_4(__kmp_init_hidden_helper)) 1474e8d8bef9SDimitry Andric __kmp_hidden_helper_initialize(); 1475e8d8bef9SDimitry Andric } else { 1476e8d8bef9SDimitry Andric // If the hidden helper task is not enabled, reset the flag to FALSE. 1477e8d8bef9SDimitry Andric flags->hidden_helper = FALSE; 1478e8d8bef9SDimitry Andric } 1479e8d8bef9SDimitry Andric } 1480e8d8bef9SDimitry Andric 14810b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 14820b57cec5SDimitry Andric "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", 14830b57cec5SDimitry Andric gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, 14840b57cec5SDimitry Andric sizeof_shareds, task_entry)); 14850b57cec5SDimitry Andric 1486fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_task); 14870b57cec5SDimitry Andric if (parent_task->td_flags.final) { 14880b57cec5SDimitry Andric if (flags->merged_if0) { 14890b57cec5SDimitry Andric } 14900b57cec5SDimitry Andric flags->final = 1; 14910b57cec5SDimitry Andric } 1492e8d8bef9SDimitry Andric 14930b57cec5SDimitry Andric if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) { 14940b57cec5SDimitry Andric // Untied task encountered causes the TSC algorithm to check entire deque of 14950b57cec5SDimitry Andric // the victim thread. If no untied task encountered, then checking the head 14960b57cec5SDimitry Andric // of the deque should be enough. 1497349cc55cSDimitry Andric KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1); 14980b57cec5SDimitry Andric } 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric // Detachable tasks are not proxy tasks yet but could be in the future. Doing 15010b57cec5SDimitry Andric // the tasking setup 15020b57cec5SDimitry Andric // when that happens is too late. 1503fe6060f1SDimitry Andric if (UNLIKELY(flags->proxy == TASK_PROXY || 1504fe6060f1SDimitry Andric flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) { 15050b57cec5SDimitry Andric if (flags->proxy == TASK_PROXY) { 15060b57cec5SDimitry Andric flags->tiedness = TASK_UNTIED; 15070b57cec5SDimitry Andric flags->merged_if0 = 1; 15080b57cec5SDimitry Andric } 15090b57cec5SDimitry Andric /* are we running in a sequential parallel or tskm_immediate_exec... we need 15100b57cec5SDimitry Andric tasking support enabled */ 1511349cc55cSDimitry Andric if ((thread->th.th_task_team) == NULL) { 15120b57cec5SDimitry Andric /* This should only happen if the team is serialized 15130b57cec5SDimitry Andric setup a task team and propagate it to the thread */ 15140b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_serialized); 15150b57cec5SDimitry Andric KA_TRACE(30, 15160b57cec5SDimitry Andric ("T#%d creating task team in __kmp_task_alloc for proxy task\n", 15170b57cec5SDimitry Andric gtid)); 1518*0fca6ea1SDimitry Andric __kmp_task_team_setup(thread, team); 1519349cc55cSDimitry Andric thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; 15200b57cec5SDimitry Andric } 1521349cc55cSDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 15220b57cec5SDimitry Andric 15230b57cec5SDimitry Andric /* tasking must be enabled now as the task might not be pushed */ 15240b57cec5SDimitry Andric if (!KMP_TASKING_ENABLED(task_team)) { 15250b57cec5SDimitry Andric KA_TRACE( 15260b57cec5SDimitry Andric 30, 15270b57cec5SDimitry Andric ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid)); 1528349cc55cSDimitry Andric __kmp_enable_tasking(task_team, thread); 1529349cc55cSDimitry Andric kmp_int32 tid = thread->th.th_info.ds.ds_tid; 15300b57cec5SDimitry Andric kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid]; 15310b57cec5SDimitry Andric // No lock needed since only owner can allocate 15320b57cec5SDimitry Andric if (thread_data->td.td_deque == NULL) { 1533349cc55cSDimitry Andric __kmp_alloc_task_deque(thread, thread_data); 15340b57cec5SDimitry Andric } 15350b57cec5SDimitry Andric } 15360b57cec5SDimitry Andric 1537fe6060f1SDimitry Andric if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) && 1538e8d8bef9SDimitry Andric task_team->tt.tt_found_proxy_tasks == FALSE) 15390b57cec5SDimitry Andric TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE); 1540e8d8bef9SDimitry Andric if (flags->hidden_helper && 1541e8d8bef9SDimitry Andric task_team->tt.tt_hidden_helper_task_encountered == FALSE) 1542e8d8bef9SDimitry Andric TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE); 15430b57cec5SDimitry Andric } 15440b57cec5SDimitry Andric 15450b57cec5SDimitry Andric // Calculate shared structure offset including padding after kmp_task_t struct 15460b57cec5SDimitry Andric // to align pointers in shared struct 15470b57cec5SDimitry Andric shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t; 15480b57cec5SDimitry Andric shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *)); 15490b57cec5SDimitry Andric 15500b57cec5SDimitry Andric // Allocate a kmp_taskdata_t block and a kmp_task_t block. 15510b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid, 15520b57cec5SDimitry Andric shareds_offset)); 15530b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid, 15540b57cec5SDimitry Andric sizeof_shareds)); 15550b57cec5SDimitry Andric 15560b57cec5SDimitry Andric // Avoid double allocation here by combining shareds with taskdata 15570b57cec5SDimitry Andric #if USE_FAST_MEMORY 1558349cc55cSDimitry Andric taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset + 1559349cc55cSDimitry Andric sizeof_shareds); 15600b57cec5SDimitry Andric #else /* ! USE_FAST_MEMORY */ 1561349cc55cSDimitry Andric taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset + 1562349cc55cSDimitry Andric sizeof_shareds); 15630b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 15640b57cec5SDimitry Andric 15650b57cec5SDimitry Andric task = KMP_TASKDATA_TO_TASK(taskdata); 15660b57cec5SDimitry Andric 15670b57cec5SDimitry Andric // Make sure task & taskdata are aligned appropriately 15685f757f3fSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_S390X || !KMP_HAVE_QUAD 15690b57cec5SDimitry Andric KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0); 15700b57cec5SDimitry Andric KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0); 15710b57cec5SDimitry Andric #else 15720b57cec5SDimitry Andric KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0); 15730b57cec5SDimitry Andric KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0); 15740b57cec5SDimitry Andric #endif 15750b57cec5SDimitry Andric if (sizeof_shareds > 0) { 15760b57cec5SDimitry Andric // Avoid double allocation here by combining shareds with taskdata 15770b57cec5SDimitry Andric task->shareds = &((char *)taskdata)[shareds_offset]; 15780b57cec5SDimitry Andric // Make sure shareds struct is aligned to pointer size 15790b57cec5SDimitry Andric KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 15800b57cec5SDimitry Andric 0); 15810b57cec5SDimitry Andric } else { 15820b57cec5SDimitry Andric task->shareds = NULL; 15830b57cec5SDimitry Andric } 15840b57cec5SDimitry Andric task->routine = task_entry; 15850b57cec5SDimitry Andric task->part_id = 0; // AC: Always start with 0 part id 15860b57cec5SDimitry Andric 15870b57cec5SDimitry Andric taskdata->td_task_id = KMP_GEN_TASK_ID(); 1588e8d8bef9SDimitry Andric taskdata->td_team = thread->th.th_team; 1589349cc55cSDimitry Andric taskdata->td_alloc_thread = thread; 15900b57cec5SDimitry Andric taskdata->td_parent = parent_task; 15910b57cec5SDimitry Andric taskdata->td_level = parent_task->td_level + 1; // increment nesting level 15920b57cec5SDimitry Andric KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0); 15930b57cec5SDimitry Andric taskdata->td_ident = loc_ref; 15940b57cec5SDimitry Andric taskdata->td_taskwait_ident = NULL; 15950b57cec5SDimitry Andric taskdata->td_taskwait_counter = 0; 15960b57cec5SDimitry Andric taskdata->td_taskwait_thread = 0; 15970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_parent != NULL); 15980b57cec5SDimitry Andric // avoid copying icvs for proxy tasks 15990b57cec5SDimitry Andric if (flags->proxy == TASK_FULL) 16000b57cec5SDimitry Andric copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs); 16010b57cec5SDimitry Andric 1602fe6060f1SDimitry Andric taskdata->td_flags = *flags; 16030b57cec5SDimitry Andric taskdata->td_task_team = thread->th.th_task_team; 16040b57cec5SDimitry Andric taskdata->td_size_alloc = shareds_offset + sizeof_shareds; 16050b57cec5SDimitry Andric taskdata->td_flags.tasktype = TASK_EXPLICIT; 1606349cc55cSDimitry Andric // If it is hidden helper task, we need to set the team and task team 1607349cc55cSDimitry Andric // correspondingly. 1608349cc55cSDimitry Andric if (flags->hidden_helper) { 1609349cc55cSDimitry Andric kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)]; 1610349cc55cSDimitry Andric taskdata->td_team = shadow_thread->th.th_team; 1611349cc55cSDimitry Andric taskdata->td_task_team = shadow_thread->th.th_task_team; 1612349cc55cSDimitry Andric } 16130b57cec5SDimitry Andric 16140b57cec5SDimitry Andric // GEH - TODO: fix this to copy parent task's value of tasking_ser flag 16150b57cec5SDimitry Andric taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec); 16160b57cec5SDimitry Andric 16170b57cec5SDimitry Andric // GEH - TODO: fix this to copy parent task's value of team_serial flag 16180b57cec5SDimitry Andric taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0; 16190b57cec5SDimitry Andric 16200b57cec5SDimitry Andric // GEH - Note we serialize the task if the team is serialized to make sure 16210b57cec5SDimitry Andric // implicit parallel region tasks are not left until program termination to 16220b57cec5SDimitry Andric // execute. Also, it helps locality to execute immediately. 16230b57cec5SDimitry Andric 16240b57cec5SDimitry Andric taskdata->td_flags.task_serial = 16250b57cec5SDimitry Andric (parent_task->td_flags.final || taskdata->td_flags.team_serial || 16265ffd83dbSDimitry Andric taskdata->td_flags.tasking_ser || flags->merged_if0); 16270b57cec5SDimitry Andric 16280b57cec5SDimitry Andric taskdata->td_flags.started = 0; 16290b57cec5SDimitry Andric taskdata->td_flags.executing = 0; 16300b57cec5SDimitry Andric taskdata->td_flags.complete = 0; 16310b57cec5SDimitry Andric taskdata->td_flags.freed = 0; 163206c3fb27SDimitry Andric #if OMPX_TASKGRAPH 163306c3fb27SDimitry Andric taskdata->td_flags.onced = 0; 163406c3fb27SDimitry Andric #endif 16350b57cec5SDimitry Andric KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0); 16360b57cec5SDimitry Andric // start at one because counts current task and children 16370b57cec5SDimitry Andric KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1); 16380b57cec5SDimitry Andric taskdata->td_taskgroup = 16390b57cec5SDimitry Andric parent_task->td_taskgroup; // task inherits taskgroup from the parent task 16400b57cec5SDimitry Andric taskdata->td_dephash = NULL; 16410b57cec5SDimitry Andric taskdata->td_depnode = NULL; 1642bdd1243dSDimitry Andric taskdata->td_target_data.async_handle = NULL; 16430b57cec5SDimitry Andric if (flags->tiedness == TASK_UNTIED) 16440b57cec5SDimitry Andric taskdata->td_last_tied = NULL; // will be set when the task is scheduled 16450b57cec5SDimitry Andric else 16460b57cec5SDimitry Andric taskdata->td_last_tied = taskdata; 16470b57cec5SDimitry Andric taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED; 16480b57cec5SDimitry Andric #if OMPT_SUPPORT 16490b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) 16500b57cec5SDimitry Andric __ompt_task_init(taskdata, gtid); 16510b57cec5SDimitry Andric #endif 1652349cc55cSDimitry Andric // TODO: What would be the balance between the conditions in the function and 1653349cc55cSDimitry Andric // an atomic operation? 1654349cc55cSDimitry Andric if (__kmp_track_children_task(taskdata)) { 16550b57cec5SDimitry Andric KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks); 16560b57cec5SDimitry Andric if (parent_task->td_taskgroup) 16570b57cec5SDimitry Andric KMP_ATOMIC_INC(&parent_task->td_taskgroup->count); 16580b57cec5SDimitry Andric // Only need to keep track of allocated child tasks for explicit tasks since 16590b57cec5SDimitry Andric // implicit not deallocated 16600b57cec5SDimitry Andric if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) { 16610b57cec5SDimitry Andric KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks); 16620b57cec5SDimitry Andric } 1663e8d8bef9SDimitry Andric if (flags->hidden_helper) { 1664e8d8bef9SDimitry Andric taskdata->td_flags.task_serial = FALSE; 1665e8d8bef9SDimitry Andric // Increment the number of hidden helper tasks to be executed 1666e8d8bef9SDimitry Andric KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks); 1667e8d8bef9SDimitry Andric } 1668fe6060f1SDimitry Andric } 1669e8d8bef9SDimitry Andric 167006c3fb27SDimitry Andric #if OMPX_TASKGRAPH 167106c3fb27SDimitry Andric kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx); 167206c3fb27SDimitry Andric if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) && 167306c3fb27SDimitry Andric (task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) { 167406c3fb27SDimitry Andric taskdata->is_taskgraph = 1; 167506c3fb27SDimitry Andric taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; 167606c3fb27SDimitry Andric taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id); 167706c3fb27SDimitry Andric } 167806c3fb27SDimitry Andric #endif 16790b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", 16800b57cec5SDimitry Andric gtid, taskdata, taskdata->td_parent)); 16810b57cec5SDimitry Andric 16820b57cec5SDimitry Andric return task; 16830b57cec5SDimitry Andric } 16840b57cec5SDimitry Andric 16850b57cec5SDimitry Andric kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, 16860b57cec5SDimitry Andric kmp_int32 flags, size_t sizeof_kmp_task_t, 16870b57cec5SDimitry Andric size_t sizeof_shareds, 16880b57cec5SDimitry Andric kmp_routine_entry_t task_entry) { 16890b57cec5SDimitry Andric kmp_task_t *retval; 16900b57cec5SDimitry Andric kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; 1691e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 16920b57cec5SDimitry Andric input_flags->native = FALSE; 16930b57cec5SDimitry Andric // __kmp_task_alloc() sets up all other runtime flags 16940b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) " 16950b57cec5SDimitry Andric "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", 16960b57cec5SDimitry Andric gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", 16970b57cec5SDimitry Andric input_flags->proxy ? "proxy" : "", 16980b57cec5SDimitry Andric input_flags->detachable ? "detachable" : "", sizeof_kmp_task_t, 16990b57cec5SDimitry Andric sizeof_shareds, task_entry)); 17000b57cec5SDimitry Andric 17010b57cec5SDimitry Andric retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t, 17020b57cec5SDimitry Andric sizeof_shareds, task_entry); 17030b57cec5SDimitry Andric 17040b57cec5SDimitry Andric KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval)); 17050b57cec5SDimitry Andric 17060b57cec5SDimitry Andric return retval; 17070b57cec5SDimitry Andric } 17080b57cec5SDimitry Andric 17090b57cec5SDimitry Andric kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, 17100b57cec5SDimitry Andric kmp_int32 flags, 17110b57cec5SDimitry Andric size_t sizeof_kmp_task_t, 17120b57cec5SDimitry Andric size_t sizeof_shareds, 17130b57cec5SDimitry Andric kmp_routine_entry_t task_entry, 17140b57cec5SDimitry Andric kmp_int64 device_id) { 1715e8d8bef9SDimitry Andric auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags); 1716349cc55cSDimitry Andric // target task is untied defined in the specification 17176e75b2fbSDimitry Andric input_flags.tiedness = TASK_UNTIED; 1718*0fca6ea1SDimitry Andric input_flags.target = 1; 1719349cc55cSDimitry Andric 1720349cc55cSDimitry Andric if (__kmp_enable_hidden_helper) 1721349cc55cSDimitry Andric input_flags.hidden_helper = TRUE; 1722e8d8bef9SDimitry Andric 17230b57cec5SDimitry Andric return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t, 17240b57cec5SDimitry Andric sizeof_shareds, task_entry); 17250b57cec5SDimitry Andric } 17260b57cec5SDimitry Andric 17270b57cec5SDimitry Andric /*! 17280b57cec5SDimitry Andric @ingroup TASKING 17290b57cec5SDimitry Andric @param loc_ref location of the original task directive 17300b57cec5SDimitry Andric @param gtid Global Thread ID of encountering thread 17310b57cec5SDimitry Andric @param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new 17320b57cec5SDimitry Andric task'' 17330b57cec5SDimitry Andric @param naffins Number of affinity items 17340b57cec5SDimitry Andric @param affin_list List of affinity items 17350b57cec5SDimitry Andric @return Returns non-zero if registering affinity information was not successful. 17360b57cec5SDimitry Andric Returns 0 if registration was successful 17370b57cec5SDimitry Andric This entry registers the affinity information attached to a task with the task 17380b57cec5SDimitry Andric thunk structure kmp_taskdata_t. 17390b57cec5SDimitry Andric */ 17400b57cec5SDimitry Andric kmp_int32 17410b57cec5SDimitry Andric __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, 17420b57cec5SDimitry Andric kmp_task_t *new_task, kmp_int32 naffins, 17430b57cec5SDimitry Andric kmp_task_affinity_info_t *affin_list) { 17440b57cec5SDimitry Andric return 0; 17450b57cec5SDimitry Andric } 17460b57cec5SDimitry Andric 17470b57cec5SDimitry Andric // __kmp_invoke_task: invoke the specified task 17480b57cec5SDimitry Andric // 17490b57cec5SDimitry Andric // gtid: global thread ID of caller 17500b57cec5SDimitry Andric // task: the task to invoke 17515ffd83dbSDimitry Andric // current_task: the task to resume after task invocation 17525f757f3fSDimitry Andric #ifdef __s390x__ 17535f757f3fSDimitry Andric __attribute__((target("backchain"))) 17545f757f3fSDimitry Andric #endif 17555f757f3fSDimitry Andric static void 17565f757f3fSDimitry Andric __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, 17570b57cec5SDimitry Andric kmp_taskdata_t *current_task) { 17580b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 17590b57cec5SDimitry Andric kmp_info_t *thread; 17600b57cec5SDimitry Andric int discard = 0 /* false */; 17610b57cec5SDimitry Andric KA_TRACE( 17620b57cec5SDimitry Andric 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", 17630b57cec5SDimitry Andric gtid, taskdata, current_task)); 17640b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task); 1765e8d8bef9SDimitry Andric if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY && 1766e8d8bef9SDimitry Andric taskdata->td_flags.complete == 1)) { 17670b57cec5SDimitry Andric // This is a proxy task that was already completed but it needs to run 17680b57cec5SDimitry Andric // its bottom-half finish 17690b57cec5SDimitry Andric KA_TRACE( 17700b57cec5SDimitry Andric 30, 17710b57cec5SDimitry Andric ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n", 17720b57cec5SDimitry Andric gtid, taskdata)); 17730b57cec5SDimitry Andric 17740b57cec5SDimitry Andric __kmp_bottom_half_finish_proxy(gtid, task); 17750b57cec5SDimitry Andric 17760b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for " 17770b57cec5SDimitry Andric "proxy task %p, resuming task %p\n", 17780b57cec5SDimitry Andric gtid, taskdata, current_task)); 17790b57cec5SDimitry Andric 17800b57cec5SDimitry Andric return; 17810b57cec5SDimitry Andric } 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric #if OMPT_SUPPORT 17840b57cec5SDimitry Andric // For untied tasks, the first task executed only calls __kmpc_omp_task and 17850b57cec5SDimitry Andric // does not execute code. 17860b57cec5SDimitry Andric ompt_thread_info_t oldInfo; 17870b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) { 17880b57cec5SDimitry Andric // Store the threads states and restore them after the task 17890b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 17900b57cec5SDimitry Andric oldInfo = thread->th.ompt_thread_info; 17910b57cec5SDimitry Andric thread->th.ompt_thread_info.wait_id = 0; 17920b57cec5SDimitry Andric thread->th.ompt_thread_info.state = (thread->th.th_team_serialized) 17930b57cec5SDimitry Andric ? ompt_state_work_serial 17940b57cec5SDimitry Andric : ompt_state_work_parallel; 17950b57cec5SDimitry Andric taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 17960b57cec5SDimitry Andric } 17970b57cec5SDimitry Andric #endif 17980b57cec5SDimitry Andric 17990b57cec5SDimitry Andric // Proxy tasks are not handled by the runtime 18000b57cec5SDimitry Andric if (taskdata->td_flags.proxy != TASK_PROXY) { 18010b57cec5SDimitry Andric __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded 18020b57cec5SDimitry Andric } 18030b57cec5SDimitry Andric 18040b57cec5SDimitry Andric // TODO: cancel tasks if the parallel region has also been cancelled 18050b57cec5SDimitry Andric // TODO: check if this sequence can be hoisted above __kmp_task_start 18060b57cec5SDimitry Andric // if cancellation has been enabled for this run ... 1807e8d8bef9SDimitry Andric if (UNLIKELY(__kmp_omp_cancellation)) { 18080b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 18090b57cec5SDimitry Andric kmp_team_t *this_team = thread->th.th_team; 18100b57cec5SDimitry Andric kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; 18110b57cec5SDimitry Andric if ((taskgroup && taskgroup->cancel_request) || 18120b57cec5SDimitry Andric (this_team->t.t_cancel_request == cancel_parallel)) { 18130b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 18140b57cec5SDimitry Andric ompt_data_t *task_data; 18150b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) { 18160b57cec5SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); 18170b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_cancel)( 18180b57cec5SDimitry Andric task_data, 18190b57cec5SDimitry Andric ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup 18200b57cec5SDimitry Andric : ompt_cancel_parallel) | 18210b57cec5SDimitry Andric ompt_cancel_discarded_task, 18220b57cec5SDimitry Andric NULL); 18230b57cec5SDimitry Andric } 18240b57cec5SDimitry Andric #endif 18250b57cec5SDimitry Andric KMP_COUNT_BLOCK(TASK_cancelled); 18260b57cec5SDimitry Andric // this task belongs to a task group and we need to cancel it 18270b57cec5SDimitry Andric discard = 1 /* true */; 18280b57cec5SDimitry Andric } 18290b57cec5SDimitry Andric } 18300b57cec5SDimitry Andric 18310b57cec5SDimitry Andric // Invoke the task routine and pass in relevant data. 18320b57cec5SDimitry Andric // Thunks generated by gcc take a different argument list. 18330b57cec5SDimitry Andric if (!discard) { 18340b57cec5SDimitry Andric if (taskdata->td_flags.tiedness == TASK_UNTIED) { 18350b57cec5SDimitry Andric taskdata->td_last_tied = current_task->td_last_tied; 18360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_last_tied); 18370b57cec5SDimitry Andric } 18380b57cec5SDimitry Andric #if KMP_STATS_ENABLED 18390b57cec5SDimitry Andric KMP_COUNT_BLOCK(TASK_executed); 18400b57cec5SDimitry Andric switch (KMP_GET_THREAD_STATE()) { 18410b57cec5SDimitry Andric case FORK_JOIN_BARRIER: 18420b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); 18430b57cec5SDimitry Andric break; 18440b57cec5SDimitry Andric case PLAIN_BARRIER: 18450b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); 18460b57cec5SDimitry Andric break; 18470b57cec5SDimitry Andric case TASKYIELD: 18480b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); 18490b57cec5SDimitry Andric break; 18500b57cec5SDimitry Andric case TASKWAIT: 18510b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); 18520b57cec5SDimitry Andric break; 18530b57cec5SDimitry Andric case TASKGROUP: 18540b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); 18550b57cec5SDimitry Andric break; 18560b57cec5SDimitry Andric default: 18570b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); 18580b57cec5SDimitry Andric break; 18590b57cec5SDimitry Andric } 18600b57cec5SDimitry Andric #endif // KMP_STATS_ENABLED 18610b57cec5SDimitry Andric 18620b57cec5SDimitry Andric // OMPT task begin 18630b57cec5SDimitry Andric #if OMPT_SUPPORT 18640b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) 18650b57cec5SDimitry Andric __ompt_task_start(task, current_task, gtid); 18660b57cec5SDimitry Andric #endif 186781ad6265SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 186881ad6265SDimitry Andric if (UNLIKELY(ompt_enabled.ompt_callback_dispatch && 186981ad6265SDimitry Andric taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) { 187081ad6265SDimitry Andric ompt_data_t instance = ompt_data_none; 187181ad6265SDimitry Andric instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk); 187281ad6265SDimitry Andric ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 187381ad6265SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 187481ad6265SDimitry Andric &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data), 187581ad6265SDimitry Andric ompt_dispatch_taskloop_chunk, instance); 187681ad6265SDimitry Andric taskdata->ompt_task_info.dispatch_chunk = {0, 0}; 187781ad6265SDimitry Andric } 187881ad6265SDimitry Andric #endif // OMPT_SUPPORT && OMPT_OPTIONAL 18790b57cec5SDimitry Andric 1880fe6060f1SDimitry Andric #if OMPD_SUPPORT 1881fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 1882fe6060f1SDimitry Andric ompd_bp_task_begin(); 1883fe6060f1SDimitry Andric #endif 1884fe6060f1SDimitry Andric 18850b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY 18860b57cec5SDimitry Andric kmp_uint64 cur_time; 18870b57cec5SDimitry Andric kmp_int32 kmp_itt_count_task = 18880b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial && 18890b57cec5SDimitry Andric current_task->td_flags.tasktype == TASK_IMPLICIT; 18900b57cec5SDimitry Andric if (kmp_itt_count_task) { 18910b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 18920b57cec5SDimitry Andric // Time outer level explicit task on barrier for adjusting imbalance time 18930b57cec5SDimitry Andric if (thread->th.th_bar_arrive_time) 18940b57cec5SDimitry Andric cur_time = __itt_get_timestamp(); 18950b57cec5SDimitry Andric else 18960b57cec5SDimitry Andric kmp_itt_count_task = 0; // thread is not on a barrier - skip timing 18970b57cec5SDimitry Andric } 1898e8d8bef9SDimitry Andric KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task) 18990b57cec5SDimitry Andric #endif 19000b57cec5SDimitry Andric 1901bdd1243dSDimitry Andric #if ENABLE_LIBOMPTARGET 1902bdd1243dSDimitry Andric if (taskdata->td_target_data.async_handle != NULL) { 1903bdd1243dSDimitry Andric // If we have a valid target async handle, that means that we have already 1904bdd1243dSDimitry Andric // executed the task routine once. We must query for the handle completion 1905bdd1243dSDimitry Andric // instead of re-executing the routine. 190606c3fb27SDimitry Andric KMP_ASSERT(tgt_target_nowait_query); 190706c3fb27SDimitry Andric tgt_target_nowait_query(&taskdata->td_target_data.async_handle); 1908bdd1243dSDimitry Andric } else 1909bdd1243dSDimitry Andric #endif 1910349cc55cSDimitry Andric if (task->routine != NULL) { 19110b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT 19120b57cec5SDimitry Andric if (taskdata->td_flags.native) { 19130b57cec5SDimitry Andric ((void (*)(void *))(*(task->routine)))(task->shareds); 19140b57cec5SDimitry Andric } else 19150b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */ 19160b57cec5SDimitry Andric { 19170b57cec5SDimitry Andric (*(task->routine))(gtid, task); 19180b57cec5SDimitry Andric } 1919349cc55cSDimitry Andric } 19200b57cec5SDimitry Andric KMP_POP_PARTITIONED_TIMER(); 19210b57cec5SDimitry Andric 19220b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY 19230b57cec5SDimitry Andric if (kmp_itt_count_task) { 19240b57cec5SDimitry Andric // Barrier imbalance - adjust arrive time with the task duration 19250b57cec5SDimitry Andric thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time); 19260b57cec5SDimitry Andric } 1927e8d8bef9SDimitry Andric KMP_FSYNC_CANCEL(taskdata); // destroy self (just executed) 1928e8d8bef9SDimitry Andric KMP_FSYNC_RELEASING(taskdata->td_parent); // releasing parent 19290b57cec5SDimitry Andric #endif 19300b57cec5SDimitry Andric } 19310b57cec5SDimitry Andric 1932fe6060f1SDimitry Andric #if OMPD_SUPPORT 1933fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 1934fe6060f1SDimitry Andric ompd_bp_task_end(); 1935fe6060f1SDimitry Andric #endif 1936fe6060f1SDimitry Andric 19370b57cec5SDimitry Andric // Proxy tasks are not handled by the runtime 19380b57cec5SDimitry Andric if (taskdata->td_flags.proxy != TASK_PROXY) { 19390b57cec5SDimitry Andric #if OMPT_SUPPORT 19400b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) { 19410b57cec5SDimitry Andric thread->th.ompt_thread_info = oldInfo; 19420b57cec5SDimitry Andric if (taskdata->td_flags.tiedness == TASK_TIED) { 19430b57cec5SDimitry Andric taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; 19440b57cec5SDimitry Andric } 19450b57cec5SDimitry Andric __kmp_task_finish<true>(gtid, task, current_task); 19460b57cec5SDimitry Andric } else 19470b57cec5SDimitry Andric #endif 19480b57cec5SDimitry Andric __kmp_task_finish<false>(gtid, task, current_task); 19490b57cec5SDimitry Andric } 1950*0fca6ea1SDimitry Andric #if OMPT_SUPPORT 1951*0fca6ea1SDimitry Andric else if (UNLIKELY(ompt_enabled.enabled && taskdata->td_flags.target)) { 1952*0fca6ea1SDimitry Andric __ompt_task_finish(task, current_task, ompt_task_switch); 1953*0fca6ea1SDimitry Andric } 1954*0fca6ea1SDimitry Andric #endif 19550b57cec5SDimitry Andric 19560b57cec5SDimitry Andric KA_TRACE( 19570b57cec5SDimitry Andric 30, 19580b57cec5SDimitry Andric ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n", 19590b57cec5SDimitry Andric gtid, taskdata, current_task)); 19600b57cec5SDimitry Andric return; 19610b57cec5SDimitry Andric } 19620b57cec5SDimitry Andric 19630b57cec5SDimitry Andric // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution 19640b57cec5SDimitry Andric // 19650b57cec5SDimitry Andric // loc_ref: location of original task pragma (ignored) 19660b57cec5SDimitry Andric // gtid: Global Thread ID of encountering thread 19670b57cec5SDimitry Andric // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' 19680b57cec5SDimitry Andric // Returns: 19690b57cec5SDimitry Andric // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to 19700b57cec5SDimitry Andric // be resumed later. 19710b57cec5SDimitry Andric // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be 19720b57cec5SDimitry Andric // resumed later. 19730b57cec5SDimitry Andric kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, 19740b57cec5SDimitry Andric kmp_task_t *new_task) { 19750b57cec5SDimitry Andric kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 19760b57cec5SDimitry Andric 19770b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid, 19780b57cec5SDimitry Andric loc_ref, new_taskdata)); 19790b57cec5SDimitry Andric 19800b57cec5SDimitry Andric #if OMPT_SUPPORT 19810b57cec5SDimitry Andric kmp_taskdata_t *parent; 19820b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) { 19830b57cec5SDimitry Andric parent = new_taskdata->td_parent; 19840b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_task_create) { 19850b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_create)( 1986fe6060f1SDimitry Andric &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame), 1987*0fca6ea1SDimitry Andric &(new_taskdata->ompt_task_info.task_data), 1988*0fca6ea1SDimitry Andric TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, 19890b57cec5SDimitry Andric OMPT_GET_RETURN_ADDRESS(0)); 19900b57cec5SDimitry Andric } 19910b57cec5SDimitry Andric } 19920b57cec5SDimitry Andric #endif 19930b57cec5SDimitry Andric 19940b57cec5SDimitry Andric /* Should we execute the new task or queue it? For now, let's just always try 19950b57cec5SDimitry Andric to queue it. If the queue fills up, then we'll execute it. */ 19960b57cec5SDimitry Andric 19970b57cec5SDimitry Andric if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer 19980b57cec5SDimitry Andric { // Execute this task immediately 19990b57cec5SDimitry Andric kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; 20000b57cec5SDimitry Andric new_taskdata->td_flags.task_serial = 1; 20010b57cec5SDimitry Andric __kmp_invoke_task(gtid, new_task, current_task); 20020b57cec5SDimitry Andric } 20030b57cec5SDimitry Andric 20040b57cec5SDimitry Andric KA_TRACE( 20050b57cec5SDimitry Andric 10, 20060b57cec5SDimitry Andric ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 20070b57cec5SDimitry Andric "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", 20080b57cec5SDimitry Andric gtid, loc_ref, new_taskdata)); 20090b57cec5SDimitry Andric 20100b57cec5SDimitry Andric #if OMPT_SUPPORT 20110b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) { 20120b57cec5SDimitry Andric parent->ompt_task_info.frame.enter_frame = ompt_data_none; 20130b57cec5SDimitry Andric } 20140b57cec5SDimitry Andric #endif 20150b57cec5SDimitry Andric return TASK_CURRENT_NOT_QUEUED; 20160b57cec5SDimitry Andric } 20170b57cec5SDimitry Andric 20180b57cec5SDimitry Andric // __kmp_omp_task: Schedule a non-thread-switchable task for execution 20190b57cec5SDimitry Andric // 20200b57cec5SDimitry Andric // gtid: Global Thread ID of encountering thread 20210b57cec5SDimitry Andric // new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() 20220b57cec5SDimitry Andric // serialize_immediate: if TRUE then if the task is executed immediately its 20230b57cec5SDimitry Andric // execution will be serialized 20240b57cec5SDimitry Andric // Returns: 20250b57cec5SDimitry Andric // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to 20260b57cec5SDimitry Andric // be resumed later. 20270b57cec5SDimitry Andric // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be 20280b57cec5SDimitry Andric // resumed later. 20290b57cec5SDimitry Andric kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, 20300b57cec5SDimitry Andric bool serialize_immediate) { 20310b57cec5SDimitry Andric kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 20320b57cec5SDimitry Andric 203306c3fb27SDimitry Andric #if OMPX_TASKGRAPH 203406c3fb27SDimitry Andric if (new_taskdata->is_taskgraph && 203506c3fb27SDimitry Andric __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) { 203606c3fb27SDimitry Andric kmp_tdg_info_t *tdg = new_taskdata->tdg; 203706c3fb27SDimitry Andric // extend the record_map if needed 203806c3fb27SDimitry Andric if (new_taskdata->td_task_id >= new_taskdata->tdg->map_size) { 203906c3fb27SDimitry Andric __kmp_acquire_bootstrap_lock(&tdg->graph_lock); 204006c3fb27SDimitry Andric // map_size could have been updated by another thread if recursive 204106c3fb27SDimitry Andric // taskloop 204206c3fb27SDimitry Andric if (new_taskdata->td_task_id >= tdg->map_size) { 204306c3fb27SDimitry Andric kmp_uint old_size = tdg->map_size; 204406c3fb27SDimitry Andric kmp_uint new_size = old_size * 2; 204506c3fb27SDimitry Andric kmp_node_info_t *old_record = tdg->record_map; 204606c3fb27SDimitry Andric kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate( 204706c3fb27SDimitry Andric new_size * sizeof(kmp_node_info_t)); 204806c3fb27SDimitry Andric 204906c3fb27SDimitry Andric KMP_MEMCPY(new_record, old_record, old_size * sizeof(kmp_node_info_t)); 205006c3fb27SDimitry Andric tdg->record_map = new_record; 205106c3fb27SDimitry Andric 205206c3fb27SDimitry Andric __kmp_free(old_record); 205306c3fb27SDimitry Andric 205406c3fb27SDimitry Andric for (kmp_int i = old_size; i < new_size; i++) { 205506c3fb27SDimitry Andric kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate( 205606c3fb27SDimitry Andric __kmp_successors_size * sizeof(kmp_int32)); 205706c3fb27SDimitry Andric new_record[i].task = nullptr; 205806c3fb27SDimitry Andric new_record[i].successors = successorsList; 205906c3fb27SDimitry Andric new_record[i].nsuccessors = 0; 206006c3fb27SDimitry Andric new_record[i].npredecessors = 0; 206106c3fb27SDimitry Andric new_record[i].successors_size = __kmp_successors_size; 206206c3fb27SDimitry Andric KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0); 206306c3fb27SDimitry Andric } 206406c3fb27SDimitry Andric // update the size at the end, so that we avoid other 206506c3fb27SDimitry Andric // threads use old_record while map_size is already updated 206606c3fb27SDimitry Andric tdg->map_size = new_size; 206706c3fb27SDimitry Andric } 206806c3fb27SDimitry Andric __kmp_release_bootstrap_lock(&tdg->graph_lock); 206906c3fb27SDimitry Andric } 207006c3fb27SDimitry Andric // record a task 207106c3fb27SDimitry Andric if (tdg->record_map[new_taskdata->td_task_id].task == nullptr) { 207206c3fb27SDimitry Andric tdg->record_map[new_taskdata->td_task_id].task = new_task; 207306c3fb27SDimitry Andric tdg->record_map[new_taskdata->td_task_id].parent_task = 207406c3fb27SDimitry Andric new_taskdata->td_parent; 207506c3fb27SDimitry Andric KMP_ATOMIC_INC(&tdg->num_tasks); 207606c3fb27SDimitry Andric } 207706c3fb27SDimitry Andric } 207806c3fb27SDimitry Andric #endif 207906c3fb27SDimitry Andric 20800b57cec5SDimitry Andric /* Should we execute the new task or queue it? For now, let's just always try 20810b57cec5SDimitry Andric to queue it. If the queue fills up, then we'll execute it. */ 20820b57cec5SDimitry Andric if (new_taskdata->td_flags.proxy == TASK_PROXY || 20830b57cec5SDimitry Andric __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer 20840b57cec5SDimitry Andric { // Execute this task immediately 20850b57cec5SDimitry Andric kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; 20860b57cec5SDimitry Andric if (serialize_immediate) 20870b57cec5SDimitry Andric new_taskdata->td_flags.task_serial = 1; 20880b57cec5SDimitry Andric __kmp_invoke_task(gtid, new_task, current_task); 208981ad6265SDimitry Andric } else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && 209081ad6265SDimitry Andric __kmp_wpolicy_passive) { 209181ad6265SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 209281ad6265SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 209381ad6265SDimitry Andric kmp_int32 nthreads = this_thr->th.th_team_nproc; 209481ad6265SDimitry Andric for (int i = 0; i < nthreads; ++i) { 209581ad6265SDimitry Andric kmp_info_t *thread = team->t.t_threads[i]; 209681ad6265SDimitry Andric if (thread == this_thr) 209781ad6265SDimitry Andric continue; 209881ad6265SDimitry Andric if (thread->th.th_sleep_loc != NULL) { 209981ad6265SDimitry Andric __kmp_null_resume_wrapper(thread); 210081ad6265SDimitry Andric break; // awake one thread at a time 21010b57cec5SDimitry Andric } 210281ad6265SDimitry Andric } 210381ad6265SDimitry Andric } 21040b57cec5SDimitry Andric return TASK_CURRENT_NOT_QUEUED; 21050b57cec5SDimitry Andric } 21060b57cec5SDimitry Andric 21070b57cec5SDimitry Andric // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a 21080b57cec5SDimitry Andric // non-thread-switchable task from the parent thread only! 21090b57cec5SDimitry Andric // 21100b57cec5SDimitry Andric // loc_ref: location of original task pragma (ignored) 21110b57cec5SDimitry Andric // gtid: Global Thread ID of encountering thread 21120b57cec5SDimitry Andric // new_task: non-thread-switchable task thunk allocated by 21130b57cec5SDimitry Andric // __kmp_omp_task_alloc() 21140b57cec5SDimitry Andric // Returns: 21150b57cec5SDimitry Andric // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to 21160b57cec5SDimitry Andric // be resumed later. 21170b57cec5SDimitry Andric // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be 21180b57cec5SDimitry Andric // resumed later. 21190b57cec5SDimitry Andric kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, 21200b57cec5SDimitry Andric kmp_task_t *new_task) { 21210b57cec5SDimitry Andric kmp_int32 res; 21220b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); 21230b57cec5SDimitry Andric 21240b57cec5SDimitry Andric #if KMP_DEBUG || OMPT_SUPPORT 21250b57cec5SDimitry Andric kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 21260b57cec5SDimitry Andric #endif 21270b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, 21280b57cec5SDimitry Andric new_taskdata)); 2129e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 21300b57cec5SDimitry Andric 21310b57cec5SDimitry Andric #if OMPT_SUPPORT 21320b57cec5SDimitry Andric kmp_taskdata_t *parent = NULL; 21330b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) { 21340b57cec5SDimitry Andric if (!new_taskdata->td_flags.started) { 21350b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 21360b57cec5SDimitry Andric parent = new_taskdata->td_parent; 21370b57cec5SDimitry Andric if (!parent->ompt_task_info.frame.enter_frame.ptr) { 2138fe6060f1SDimitry Andric parent->ompt_task_info.frame.enter_frame.ptr = 2139fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 21400b57cec5SDimitry Andric } 21410b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_task_create) { 21420b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_create)( 2143fe6060f1SDimitry Andric &(parent->ompt_task_info.task_data), 2144fe6060f1SDimitry Andric &(parent->ompt_task_info.frame), 21450b57cec5SDimitry Andric &(new_taskdata->ompt_task_info.task_data), 2146*0fca6ea1SDimitry Andric TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, 21470b57cec5SDimitry Andric OMPT_LOAD_RETURN_ADDRESS(gtid)); 21480b57cec5SDimitry Andric } 21490b57cec5SDimitry Andric } else { 21500b57cec5SDimitry Andric // We are scheduling the continuation of an UNTIED task. 21510b57cec5SDimitry Andric // Scheduling back to the parent task. 21520b57cec5SDimitry Andric __ompt_task_finish(new_task, 21530b57cec5SDimitry Andric new_taskdata->ompt_task_info.scheduling_parent, 21540b57cec5SDimitry Andric ompt_task_switch); 21550b57cec5SDimitry Andric new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; 21560b57cec5SDimitry Andric } 21570b57cec5SDimitry Andric } 21580b57cec5SDimitry Andric #endif 21590b57cec5SDimitry Andric 21600b57cec5SDimitry Andric res = __kmp_omp_task(gtid, new_task, true); 21610b57cec5SDimitry Andric 21620b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " 21630b57cec5SDimitry Andric "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", 21640b57cec5SDimitry Andric gtid, loc_ref, new_taskdata)); 21650b57cec5SDimitry Andric #if OMPT_SUPPORT 21660b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { 21670b57cec5SDimitry Andric parent->ompt_task_info.frame.enter_frame = ompt_data_none; 21680b57cec5SDimitry Andric } 21690b57cec5SDimitry Andric #endif 21700b57cec5SDimitry Andric return res; 21710b57cec5SDimitry Andric } 21720b57cec5SDimitry Andric 21730b57cec5SDimitry Andric // __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule 21740b57cec5SDimitry Andric // a taskloop task with the correct OMPT return address 21750b57cec5SDimitry Andric // 21760b57cec5SDimitry Andric // loc_ref: location of original task pragma (ignored) 21770b57cec5SDimitry Andric // gtid: Global Thread ID of encountering thread 21780b57cec5SDimitry Andric // new_task: non-thread-switchable task thunk allocated by 21790b57cec5SDimitry Andric // __kmp_omp_task_alloc() 21800b57cec5SDimitry Andric // codeptr_ra: return address for OMPT callback 21810b57cec5SDimitry Andric // Returns: 21820b57cec5SDimitry Andric // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to 21830b57cec5SDimitry Andric // be resumed later. 21840b57cec5SDimitry Andric // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be 21850b57cec5SDimitry Andric // resumed later. 21860b57cec5SDimitry Andric kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid, 21870b57cec5SDimitry Andric kmp_task_t *new_task, void *codeptr_ra) { 21880b57cec5SDimitry Andric kmp_int32 res; 21890b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); 21900b57cec5SDimitry Andric 21910b57cec5SDimitry Andric #if KMP_DEBUG || OMPT_SUPPORT 21920b57cec5SDimitry Andric kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 21930b57cec5SDimitry Andric #endif 21940b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, 21950b57cec5SDimitry Andric new_taskdata)); 21960b57cec5SDimitry Andric 21970b57cec5SDimitry Andric #if OMPT_SUPPORT 21980b57cec5SDimitry Andric kmp_taskdata_t *parent = NULL; 21990b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) { 22000b57cec5SDimitry Andric parent = new_taskdata->td_parent; 22010b57cec5SDimitry Andric if (!parent->ompt_task_info.frame.enter_frame.ptr) 22020b57cec5SDimitry Andric parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 22030b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_task_create) { 22040b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_create)( 2205fe6060f1SDimitry Andric &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame), 22060b57cec5SDimitry Andric &(new_taskdata->ompt_task_info.task_data), 2207*0fca6ea1SDimitry Andric TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, codeptr_ra); 22080b57cec5SDimitry Andric } 22090b57cec5SDimitry Andric } 22100b57cec5SDimitry Andric #endif 22110b57cec5SDimitry Andric 22120b57cec5SDimitry Andric res = __kmp_omp_task(gtid, new_task, true); 22130b57cec5SDimitry Andric 22140b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " 22150b57cec5SDimitry Andric "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", 22160b57cec5SDimitry Andric gtid, loc_ref, new_taskdata)); 22170b57cec5SDimitry Andric #if OMPT_SUPPORT 22180b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { 22190b57cec5SDimitry Andric parent->ompt_task_info.frame.enter_frame = ompt_data_none; 22200b57cec5SDimitry Andric } 22210b57cec5SDimitry Andric #endif 22220b57cec5SDimitry Andric return res; 22230b57cec5SDimitry Andric } 22240b57cec5SDimitry Andric 22250b57cec5SDimitry Andric template <bool ompt> 22260b57cec5SDimitry Andric static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, 22270b57cec5SDimitry Andric void *frame_address, 22280b57cec5SDimitry Andric void *return_address) { 2229fe6060f1SDimitry Andric kmp_taskdata_t *taskdata = nullptr; 22300b57cec5SDimitry Andric kmp_info_t *thread; 22310b57cec5SDimitry Andric int thread_finished = FALSE; 22320b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(TASKWAIT); 22330b57cec5SDimitry Andric 22340b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref)); 2235fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 22360b57cec5SDimitry Andric 22370b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 22380b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 22390b57cec5SDimitry Andric taskdata = thread->th.th_current_task; 22400b57cec5SDimitry Andric 22410b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 22420b57cec5SDimitry Andric ompt_data_t *my_task_data; 22430b57cec5SDimitry Andric ompt_data_t *my_parallel_data; 22440b57cec5SDimitry Andric 22450b57cec5SDimitry Andric if (ompt) { 22460b57cec5SDimitry Andric my_task_data = &(taskdata->ompt_task_info.task_data); 22470b57cec5SDimitry Andric my_parallel_data = OMPT_CUR_TEAM_DATA(thread); 22480b57cec5SDimitry Andric 22490b57cec5SDimitry Andric taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address; 22500b57cec5SDimitry Andric 22510b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region) { 22520b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 22530b57cec5SDimitry Andric ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, 22540b57cec5SDimitry Andric my_task_data, return_address); 22550b57cec5SDimitry Andric } 22560b57cec5SDimitry Andric 22570b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region_wait) { 22580b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 22590b57cec5SDimitry Andric ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, 22600b57cec5SDimitry Andric my_task_data, return_address); 22610b57cec5SDimitry Andric } 22620b57cec5SDimitry Andric } 22630b57cec5SDimitry Andric #endif // OMPT_SUPPORT && OMPT_OPTIONAL 22640b57cec5SDimitry Andric 22650b57cec5SDimitry Andric // Debugger: The taskwait is active. Store location and thread encountered the 22660b57cec5SDimitry Andric // taskwait. 22670b57cec5SDimitry Andric #if USE_ITT_BUILD 22680b57cec5SDimitry Andric // Note: These values are used by ITT events as well. 22690b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22700b57cec5SDimitry Andric taskdata->td_taskwait_counter += 1; 22710b57cec5SDimitry Andric taskdata->td_taskwait_ident = loc_ref; 22720b57cec5SDimitry Andric taskdata->td_taskwait_thread = gtid + 1; 22730b57cec5SDimitry Andric 22740b57cec5SDimitry Andric #if USE_ITT_BUILD 2275fe6060f1SDimitry Andric void *itt_sync_obj = NULL; 2276fe6060f1SDimitry Andric #if USE_ITT_NOTIFY 2277fe6060f1SDimitry Andric KMP_ITT_TASKWAIT_STARTING(itt_sync_obj); 2278fe6060f1SDimitry Andric #endif /* USE_ITT_NOTIFY */ 22790b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22800b57cec5SDimitry Andric 22810b57cec5SDimitry Andric bool must_wait = 22820b57cec5SDimitry Andric !taskdata->td_flags.team_serial && !taskdata->td_flags.final; 22830b57cec5SDimitry Andric 22840b57cec5SDimitry Andric must_wait = must_wait || (thread->th.th_task_team != NULL && 22850b57cec5SDimitry Andric thread->th.th_task_team->tt.tt_found_proxy_tasks); 2286e8d8bef9SDimitry Andric // If hidden helper thread is encountered, we must enable wait here. 2287e8d8bef9SDimitry Andric must_wait = 2288e8d8bef9SDimitry Andric must_wait || 2289e8d8bef9SDimitry Andric (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL && 2290e8d8bef9SDimitry Andric thread->th.th_task_team->tt.tt_hidden_helper_task_encountered); 2291e8d8bef9SDimitry Andric 22920b57cec5SDimitry Andric if (must_wait) { 2293e8d8bef9SDimitry Andric kmp_flag_32<false, false> flag( 2294e8d8bef9SDimitry Andric RCAST(std::atomic<kmp_uint32> *, 22950b57cec5SDimitry Andric &(taskdata->td_incomplete_child_tasks)), 22960b57cec5SDimitry Andric 0U); 22970b57cec5SDimitry Andric while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) { 22980b57cec5SDimitry Andric flag.execute_tasks(thread, gtid, FALSE, 22990b57cec5SDimitry Andric &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), 23000b57cec5SDimitry Andric __kmp_task_stealing_constraint); 23010b57cec5SDimitry Andric } 23020b57cec5SDimitry Andric } 23030b57cec5SDimitry Andric #if USE_ITT_BUILD 2304fe6060f1SDimitry Andric KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj); 2305e8d8bef9SDimitry Andric KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with children 23060b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 23070b57cec5SDimitry Andric 23080b57cec5SDimitry Andric // Debugger: The taskwait is completed. Location remains, but thread is 23090b57cec5SDimitry Andric // negated. 23100b57cec5SDimitry Andric taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; 23110b57cec5SDimitry Andric 23120b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 23130b57cec5SDimitry Andric if (ompt) { 23140b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region_wait) { 23150b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 23160b57cec5SDimitry Andric ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, 23170b57cec5SDimitry Andric my_task_data, return_address); 23180b57cec5SDimitry Andric } 23190b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region) { 23200b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 23210b57cec5SDimitry Andric ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, 23220b57cec5SDimitry Andric my_task_data, return_address); 23230b57cec5SDimitry Andric } 23240b57cec5SDimitry Andric taskdata->ompt_task_info.frame.enter_frame = ompt_data_none; 23250b57cec5SDimitry Andric } 23260b57cec5SDimitry Andric #endif // OMPT_SUPPORT && OMPT_OPTIONAL 23270b57cec5SDimitry Andric } 23280b57cec5SDimitry Andric 23290b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 23300b57cec5SDimitry Andric "returning TASK_CURRENT_NOT_QUEUED\n", 23310b57cec5SDimitry Andric gtid, taskdata)); 23320b57cec5SDimitry Andric 23330b57cec5SDimitry Andric return TASK_CURRENT_NOT_QUEUED; 23340b57cec5SDimitry Andric } 23350b57cec5SDimitry Andric 23360b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 23370b57cec5SDimitry Andric OMPT_NOINLINE 23380b57cec5SDimitry Andric static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, 23390b57cec5SDimitry Andric void *frame_address, 23400b57cec5SDimitry Andric void *return_address) { 23410b57cec5SDimitry Andric return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address, 23420b57cec5SDimitry Andric return_address); 23430b57cec5SDimitry Andric } 23440b57cec5SDimitry Andric #endif // OMPT_SUPPORT && OMPT_OPTIONAL 23450b57cec5SDimitry Andric 23460b57cec5SDimitry Andric // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are 23470b57cec5SDimitry Andric // complete 23480b57cec5SDimitry Andric kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { 23490b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 23500b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) { 23510b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 23520b57cec5SDimitry Andric return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), 23530b57cec5SDimitry Andric OMPT_LOAD_RETURN_ADDRESS(gtid)); 23540b57cec5SDimitry Andric } 23550b57cec5SDimitry Andric #endif 23560b57cec5SDimitry Andric return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL); 23570b57cec5SDimitry Andric } 23580b57cec5SDimitry Andric 23590b57cec5SDimitry Andric // __kmpc_omp_taskyield: switch to a different task 23600b57cec5SDimitry Andric kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) { 2361fe6060f1SDimitry Andric kmp_taskdata_t *taskdata = NULL; 23620b57cec5SDimitry Andric kmp_info_t *thread; 23630b57cec5SDimitry Andric int thread_finished = FALSE; 23640b57cec5SDimitry Andric 23650b57cec5SDimitry Andric KMP_COUNT_BLOCK(OMP_TASKYIELD); 23660b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(TASKYIELD); 23670b57cec5SDimitry Andric 23680b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n", 23690b57cec5SDimitry Andric gtid, loc_ref, end_part)); 2370e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 23710b57cec5SDimitry Andric 23720b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) { 23730b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 23740b57cec5SDimitry Andric taskdata = thread->th.th_current_task; 23750b57cec5SDimitry Andric // Should we model this as a task wait or not? 23760b57cec5SDimitry Andric // Debugger: The taskwait is active. Store location and thread encountered the 23770b57cec5SDimitry Andric // taskwait. 23780b57cec5SDimitry Andric #if USE_ITT_BUILD 23790b57cec5SDimitry Andric // Note: These values are used by ITT events as well. 23800b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 23810b57cec5SDimitry Andric taskdata->td_taskwait_counter += 1; 23820b57cec5SDimitry Andric taskdata->td_taskwait_ident = loc_ref; 23830b57cec5SDimitry Andric taskdata->td_taskwait_thread = gtid + 1; 23840b57cec5SDimitry Andric 23850b57cec5SDimitry Andric #if USE_ITT_BUILD 2386fe6060f1SDimitry Andric void *itt_sync_obj = NULL; 2387fe6060f1SDimitry Andric #if USE_ITT_NOTIFY 2388fe6060f1SDimitry Andric KMP_ITT_TASKWAIT_STARTING(itt_sync_obj); 2389fe6060f1SDimitry Andric #endif /* USE_ITT_NOTIFY */ 23900b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 23910b57cec5SDimitry Andric if (!taskdata->td_flags.team_serial) { 23920b57cec5SDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 23930b57cec5SDimitry Andric if (task_team != NULL) { 23940b57cec5SDimitry Andric if (KMP_TASKING_ENABLED(task_team)) { 23950b57cec5SDimitry Andric #if OMPT_SUPPORT 23960b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) 23970b57cec5SDimitry Andric thread->th.ompt_thread_info.ompt_task_yielded = 1; 23980b57cec5SDimitry Andric #endif 23990b57cec5SDimitry Andric __kmp_execute_tasks_32( 2400e8d8bef9SDimitry Andric thread, gtid, (kmp_flag_32<> *)NULL, FALSE, 24010b57cec5SDimitry Andric &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), 24020b57cec5SDimitry Andric __kmp_task_stealing_constraint); 24030b57cec5SDimitry Andric #if OMPT_SUPPORT 24040b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) 24050b57cec5SDimitry Andric thread->th.ompt_thread_info.ompt_task_yielded = 0; 24060b57cec5SDimitry Andric #endif 24070b57cec5SDimitry Andric } 24080b57cec5SDimitry Andric } 24090b57cec5SDimitry Andric } 24100b57cec5SDimitry Andric #if USE_ITT_BUILD 2411fe6060f1SDimitry Andric KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj); 24120b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 24130b57cec5SDimitry Andric 24140b57cec5SDimitry Andric // Debugger: The taskwait is completed. Location remains, but thread is 24150b57cec5SDimitry Andric // negated. 24160b57cec5SDimitry Andric taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; 24170b57cec5SDimitry Andric } 24180b57cec5SDimitry Andric 24190b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 24200b57cec5SDimitry Andric "returning TASK_CURRENT_NOT_QUEUED\n", 24210b57cec5SDimitry Andric gtid, taskdata)); 24220b57cec5SDimitry Andric 24230b57cec5SDimitry Andric return TASK_CURRENT_NOT_QUEUED; 24240b57cec5SDimitry Andric } 24250b57cec5SDimitry Andric 24260b57cec5SDimitry Andric // Task Reduction implementation 24270b57cec5SDimitry Andric // 24280b57cec5SDimitry Andric // Note: initial implementation didn't take into account the possibility 24290b57cec5SDimitry Andric // to specify omp_orig for initializer of the UDR (user defined reduction). 24300b57cec5SDimitry Andric // Corrected implementation takes into account the omp_orig object. 24310b57cec5SDimitry Andric // Compiler is free to use old implementation if omp_orig is not specified. 24320b57cec5SDimitry Andric 24330b57cec5SDimitry Andric /*! 24340b57cec5SDimitry Andric @ingroup BASIC_TYPES 24350b57cec5SDimitry Andric @{ 24360b57cec5SDimitry Andric */ 24370b57cec5SDimitry Andric 24380b57cec5SDimitry Andric /*! 24390b57cec5SDimitry Andric Flags for special info per task reduction item. 24400b57cec5SDimitry Andric */ 24410b57cec5SDimitry Andric typedef struct kmp_taskred_flags { 244281ad6265SDimitry Andric /*! 1 - use lazy alloc/init (e.g. big objects, num tasks < num threads) */ 24430b57cec5SDimitry Andric unsigned lazy_priv : 1; 24440b57cec5SDimitry Andric unsigned reserved31 : 31; 24450b57cec5SDimitry Andric } kmp_taskred_flags_t; 24460b57cec5SDimitry Andric 24470b57cec5SDimitry Andric /*! 24480b57cec5SDimitry Andric Internal struct for reduction data item related info set up by compiler. 24490b57cec5SDimitry Andric */ 24500b57cec5SDimitry Andric typedef struct kmp_task_red_input { 24510b57cec5SDimitry Andric void *reduce_shar; /**< shared between tasks item to reduce into */ 24520b57cec5SDimitry Andric size_t reduce_size; /**< size of data item in bytes */ 24530b57cec5SDimitry Andric // three compiler-generated routines (init, fini are optional): 24540b57cec5SDimitry Andric void *reduce_init; /**< data initialization routine (single parameter) */ 24550b57cec5SDimitry Andric void *reduce_fini; /**< data finalization routine */ 24560b57cec5SDimitry Andric void *reduce_comb; /**< data combiner routine */ 24570b57cec5SDimitry Andric kmp_taskred_flags_t flags; /**< flags for additional info from compiler */ 24580b57cec5SDimitry Andric } kmp_task_red_input_t; 24590b57cec5SDimitry Andric 24600b57cec5SDimitry Andric /*! 24610b57cec5SDimitry Andric Internal struct for reduction data item related info saved by the library. 24620b57cec5SDimitry Andric */ 24630b57cec5SDimitry Andric typedef struct kmp_taskred_data { 24640b57cec5SDimitry Andric void *reduce_shar; /**< shared between tasks item to reduce into */ 24650b57cec5SDimitry Andric size_t reduce_size; /**< size of data item */ 24660b57cec5SDimitry Andric kmp_taskred_flags_t flags; /**< flags for additional info from compiler */ 24670b57cec5SDimitry Andric void *reduce_priv; /**< array of thread specific items */ 24680b57cec5SDimitry Andric void *reduce_pend; /**< end of private data for faster comparison op */ 24690b57cec5SDimitry Andric // three compiler-generated routines (init, fini are optional): 24700b57cec5SDimitry Andric void *reduce_comb; /**< data combiner routine */ 24710b57cec5SDimitry Andric void *reduce_init; /**< data initialization routine (two parameters) */ 24720b57cec5SDimitry Andric void *reduce_fini; /**< data finalization routine */ 24730b57cec5SDimitry Andric void *reduce_orig; /**< original item (can be used in UDR initializer) */ 24740b57cec5SDimitry Andric } kmp_taskred_data_t; 24750b57cec5SDimitry Andric 24760b57cec5SDimitry Andric /*! 24770b57cec5SDimitry Andric Internal struct for reduction data item related info set up by compiler. 24780b57cec5SDimitry Andric 24790b57cec5SDimitry Andric New interface: added reduce_orig field to provide omp_orig for UDR initializer. 24800b57cec5SDimitry Andric */ 24810b57cec5SDimitry Andric typedef struct kmp_taskred_input { 24820b57cec5SDimitry Andric void *reduce_shar; /**< shared between tasks item to reduce into */ 24830b57cec5SDimitry Andric void *reduce_orig; /**< original reduction item used for initialization */ 24840b57cec5SDimitry Andric size_t reduce_size; /**< size of data item */ 24850b57cec5SDimitry Andric // three compiler-generated routines (init, fini are optional): 24860b57cec5SDimitry Andric void *reduce_init; /**< data initialization routine (two parameters) */ 24870b57cec5SDimitry Andric void *reduce_fini; /**< data finalization routine */ 24880b57cec5SDimitry Andric void *reduce_comb; /**< data combiner routine */ 24890b57cec5SDimitry Andric kmp_taskred_flags_t flags; /**< flags for additional info from compiler */ 24900b57cec5SDimitry Andric } kmp_taskred_input_t; 24910b57cec5SDimitry Andric /*! 24920b57cec5SDimitry Andric @} 24930b57cec5SDimitry Andric */ 24940b57cec5SDimitry Andric 24950b57cec5SDimitry Andric template <typename T> void __kmp_assign_orig(kmp_taskred_data_t &item, T &src); 24960b57cec5SDimitry Andric template <> 24970b57cec5SDimitry Andric void __kmp_assign_orig<kmp_task_red_input_t>(kmp_taskred_data_t &item, 24980b57cec5SDimitry Andric kmp_task_red_input_t &src) { 24990b57cec5SDimitry Andric item.reduce_orig = NULL; 25000b57cec5SDimitry Andric } 25010b57cec5SDimitry Andric template <> 25020b57cec5SDimitry Andric void __kmp_assign_orig<kmp_taskred_input_t>(kmp_taskred_data_t &item, 25030b57cec5SDimitry Andric kmp_taskred_input_t &src) { 25040b57cec5SDimitry Andric if (src.reduce_orig != NULL) { 25050b57cec5SDimitry Andric item.reduce_orig = src.reduce_orig; 25060b57cec5SDimitry Andric } else { 25070b57cec5SDimitry Andric item.reduce_orig = src.reduce_shar; 25080b57cec5SDimitry Andric } // non-NULL reduce_orig means new interface used 25090b57cec5SDimitry Andric } 25100b57cec5SDimitry Andric 2511e8d8bef9SDimitry Andric template <typename T> void __kmp_call_init(kmp_taskred_data_t &item, size_t j); 25120b57cec5SDimitry Andric template <> 25130b57cec5SDimitry Andric void __kmp_call_init<kmp_task_red_input_t>(kmp_taskred_data_t &item, 2514e8d8bef9SDimitry Andric size_t offset) { 25150b57cec5SDimitry Andric ((void (*)(void *))item.reduce_init)((char *)(item.reduce_priv) + offset); 25160b57cec5SDimitry Andric } 25170b57cec5SDimitry Andric template <> 25180b57cec5SDimitry Andric void __kmp_call_init<kmp_taskred_input_t>(kmp_taskred_data_t &item, 2519e8d8bef9SDimitry Andric size_t offset) { 25200b57cec5SDimitry Andric ((void (*)(void *, void *))item.reduce_init)( 25210b57cec5SDimitry Andric (char *)(item.reduce_priv) + offset, item.reduce_orig); 25220b57cec5SDimitry Andric } 25230b57cec5SDimitry Andric 25240b57cec5SDimitry Andric template <typename T> 25250b57cec5SDimitry Andric void *__kmp_task_reduction_init(int gtid, int num, T *data) { 2526e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 25270b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 25280b57cec5SDimitry Andric kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup; 2529e8d8bef9SDimitry Andric kmp_uint32 nth = thread->th.th_team_nproc; 25300b57cec5SDimitry Andric kmp_taskred_data_t *arr; 25310b57cec5SDimitry Andric 25320b57cec5SDimitry Andric // check input data just in case 25330b57cec5SDimitry Andric KMP_ASSERT(tg != NULL); 25340b57cec5SDimitry Andric KMP_ASSERT(data != NULL); 25350b57cec5SDimitry Andric KMP_ASSERT(num > 0); 25365f757f3fSDimitry Andric if (nth == 1 && !__kmp_enable_hidden_helper) { 25370b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n", 25380b57cec5SDimitry Andric gtid, tg)); 25390b57cec5SDimitry Andric return (void *)tg; 25400b57cec5SDimitry Andric } 25410b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n", 25420b57cec5SDimitry Andric gtid, tg, num)); 25430b57cec5SDimitry Andric arr = (kmp_taskred_data_t *)__kmp_thread_malloc( 25440b57cec5SDimitry Andric thread, num * sizeof(kmp_taskred_data_t)); 25450b57cec5SDimitry Andric for (int i = 0; i < num; ++i) { 25460b57cec5SDimitry Andric size_t size = data[i].reduce_size - 1; 25470b57cec5SDimitry Andric // round the size up to cache line per thread-specific item 25480b57cec5SDimitry Andric size += CACHE_LINE - size % CACHE_LINE; 25490b57cec5SDimitry Andric KMP_ASSERT(data[i].reduce_comb != NULL); // combiner is mandatory 25500b57cec5SDimitry Andric arr[i].reduce_shar = data[i].reduce_shar; 25510b57cec5SDimitry Andric arr[i].reduce_size = size; 25520b57cec5SDimitry Andric arr[i].flags = data[i].flags; 25530b57cec5SDimitry Andric arr[i].reduce_comb = data[i].reduce_comb; 25540b57cec5SDimitry Andric arr[i].reduce_init = data[i].reduce_init; 25550b57cec5SDimitry Andric arr[i].reduce_fini = data[i].reduce_fini; 25560b57cec5SDimitry Andric __kmp_assign_orig<T>(arr[i], data[i]); 25570b57cec5SDimitry Andric if (!arr[i].flags.lazy_priv) { 25580b57cec5SDimitry Andric // allocate cache-line aligned block and fill it with zeros 25590b57cec5SDimitry Andric arr[i].reduce_priv = __kmp_allocate(nth * size); 25600b57cec5SDimitry Andric arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size; 25610b57cec5SDimitry Andric if (arr[i].reduce_init != NULL) { 25620b57cec5SDimitry Andric // initialize all thread-specific items 2563e8d8bef9SDimitry Andric for (size_t j = 0; j < nth; ++j) { 25640b57cec5SDimitry Andric __kmp_call_init<T>(arr[i], j * size); 25650b57cec5SDimitry Andric } 25660b57cec5SDimitry Andric } 25670b57cec5SDimitry Andric } else { 25680b57cec5SDimitry Andric // only allocate space for pointers now, 25690b57cec5SDimitry Andric // objects will be lazily allocated/initialized if/when requested 25700b57cec5SDimitry Andric // note that __kmp_allocate zeroes the allocated memory 25710b57cec5SDimitry Andric arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *)); 25720b57cec5SDimitry Andric } 25730b57cec5SDimitry Andric } 25740b57cec5SDimitry Andric tg->reduce_data = (void *)arr; 25750b57cec5SDimitry Andric tg->reduce_num_data = num; 25760b57cec5SDimitry Andric return (void *)tg; 25770b57cec5SDimitry Andric } 25780b57cec5SDimitry Andric 25790b57cec5SDimitry Andric /*! 25800b57cec5SDimitry Andric @ingroup TASKING 25810b57cec5SDimitry Andric @param gtid Global thread ID 25820b57cec5SDimitry Andric @param num Number of data items to reduce 25830b57cec5SDimitry Andric @param data Array of data for reduction 25840b57cec5SDimitry Andric @return The taskgroup identifier 25850b57cec5SDimitry Andric 25860b57cec5SDimitry Andric Initialize task reduction for the taskgroup. 25870b57cec5SDimitry Andric 25880b57cec5SDimitry Andric Note: this entry supposes the optional compiler-generated initializer routine 25890b57cec5SDimitry Andric has single parameter - pointer to object to be initialized. That means 25900b57cec5SDimitry Andric the reduction either does not use omp_orig object, or the omp_orig is accessible 25910b57cec5SDimitry Andric without help of the runtime library. 25920b57cec5SDimitry Andric */ 25930b57cec5SDimitry Andric void *__kmpc_task_reduction_init(int gtid, int num, void *data) { 259406c3fb27SDimitry Andric #if OMPX_TASKGRAPH 259506c3fb27SDimitry Andric kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx); 259606c3fb27SDimitry Andric if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) { 259706c3fb27SDimitry Andric kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; 259806c3fb27SDimitry Andric this_tdg->rec_taskred_data = 259906c3fb27SDimitry Andric __kmp_allocate(sizeof(kmp_task_red_input_t) * num); 260006c3fb27SDimitry Andric this_tdg->rec_num_taskred = num; 260106c3fb27SDimitry Andric KMP_MEMCPY(this_tdg->rec_taskred_data, data, 260206c3fb27SDimitry Andric sizeof(kmp_task_red_input_t) * num); 260306c3fb27SDimitry Andric } 260406c3fb27SDimitry Andric #endif 26050b57cec5SDimitry Andric return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data); 26060b57cec5SDimitry Andric } 26070b57cec5SDimitry Andric 26080b57cec5SDimitry Andric /*! 26090b57cec5SDimitry Andric @ingroup TASKING 26100b57cec5SDimitry Andric @param gtid Global thread ID 26110b57cec5SDimitry Andric @param num Number of data items to reduce 26120b57cec5SDimitry Andric @param data Array of data for reduction 26130b57cec5SDimitry Andric @return The taskgroup identifier 26140b57cec5SDimitry Andric 26150b57cec5SDimitry Andric Initialize task reduction for the taskgroup. 26160b57cec5SDimitry Andric 26170b57cec5SDimitry Andric Note: this entry supposes the optional compiler-generated initializer routine 26180b57cec5SDimitry Andric has two parameters, pointer to object to be initialized and pointer to omp_orig 26190b57cec5SDimitry Andric */ 26200b57cec5SDimitry Andric void *__kmpc_taskred_init(int gtid, int num, void *data) { 262106c3fb27SDimitry Andric #if OMPX_TASKGRAPH 262206c3fb27SDimitry Andric kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx); 262306c3fb27SDimitry Andric if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) { 262406c3fb27SDimitry Andric kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; 262506c3fb27SDimitry Andric this_tdg->rec_taskred_data = 262606c3fb27SDimitry Andric __kmp_allocate(sizeof(kmp_task_red_input_t) * num); 262706c3fb27SDimitry Andric this_tdg->rec_num_taskred = num; 262806c3fb27SDimitry Andric KMP_MEMCPY(this_tdg->rec_taskred_data, data, 262906c3fb27SDimitry Andric sizeof(kmp_task_red_input_t) * num); 263006c3fb27SDimitry Andric } 263106c3fb27SDimitry Andric #endif 26320b57cec5SDimitry Andric return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data); 26330b57cec5SDimitry Andric } 26340b57cec5SDimitry Andric 26350b57cec5SDimitry Andric // Copy task reduction data (except for shared pointers). 26360b57cec5SDimitry Andric template <typename T> 26370b57cec5SDimitry Andric void __kmp_task_reduction_init_copy(kmp_info_t *thr, int num, T *data, 26380b57cec5SDimitry Andric kmp_taskgroup_t *tg, void *reduce_data) { 26390b57cec5SDimitry Andric kmp_taskred_data_t *arr; 26400b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p," 26410b57cec5SDimitry Andric " from data %p\n", 26420b57cec5SDimitry Andric thr, tg, reduce_data)); 26430b57cec5SDimitry Andric arr = (kmp_taskred_data_t *)__kmp_thread_malloc( 26440b57cec5SDimitry Andric thr, num * sizeof(kmp_taskred_data_t)); 26450b57cec5SDimitry Andric // threads will share private copies, thunk routines, sizes, flags, etc.: 26460b57cec5SDimitry Andric KMP_MEMCPY(arr, reduce_data, num * sizeof(kmp_taskred_data_t)); 26470b57cec5SDimitry Andric for (int i = 0; i < num; ++i) { 26480b57cec5SDimitry Andric arr[i].reduce_shar = data[i].reduce_shar; // init unique shared pointers 26490b57cec5SDimitry Andric } 26500b57cec5SDimitry Andric tg->reduce_data = (void *)arr; 26510b57cec5SDimitry Andric tg->reduce_num_data = num; 26520b57cec5SDimitry Andric } 26530b57cec5SDimitry Andric 26540b57cec5SDimitry Andric /*! 26550b57cec5SDimitry Andric @ingroup TASKING 26560b57cec5SDimitry Andric @param gtid Global thread ID 26570b57cec5SDimitry Andric @param tskgrp The taskgroup ID (optional) 26580b57cec5SDimitry Andric @param data Shared location of the item 26590b57cec5SDimitry Andric @return The pointer to per-thread data 26600b57cec5SDimitry Andric 26610b57cec5SDimitry Andric Get thread-specific location of data item 26620b57cec5SDimitry Andric */ 26630b57cec5SDimitry Andric void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) { 2664e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 26650b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 26660b57cec5SDimitry Andric kmp_int32 nth = thread->th.th_team_nproc; 26670b57cec5SDimitry Andric if (nth == 1) 26680b57cec5SDimitry Andric return data; // nothing to do 26690b57cec5SDimitry Andric 26700b57cec5SDimitry Andric kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp; 26710b57cec5SDimitry Andric if (tg == NULL) 26720b57cec5SDimitry Andric tg = thread->th.th_current_task->td_taskgroup; 26730b57cec5SDimitry Andric KMP_ASSERT(tg != NULL); 2674*0fca6ea1SDimitry Andric kmp_taskred_data_t *arr; 2675*0fca6ea1SDimitry Andric kmp_int32 num; 26760b57cec5SDimitry Andric kmp_int32 tid = thread->th.th_info.ds.ds_tid; 26770b57cec5SDimitry Andric 267806c3fb27SDimitry Andric #if OMPX_TASKGRAPH 267906c3fb27SDimitry Andric if ((thread->th.th_current_task->is_taskgraph) && 268006c3fb27SDimitry Andric (!__kmp_tdg_is_recording( 268106c3fb27SDimitry Andric __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) { 268206c3fb27SDimitry Andric tg = thread->th.th_current_task->td_taskgroup; 268306c3fb27SDimitry Andric KMP_ASSERT(tg != NULL); 268406c3fb27SDimitry Andric KMP_ASSERT(tg->reduce_data != NULL); 268506c3fb27SDimitry Andric arr = (kmp_taskred_data_t *)(tg->reduce_data); 268606c3fb27SDimitry Andric num = tg->reduce_num_data; 268706c3fb27SDimitry Andric } 268806c3fb27SDimitry Andric #endif 268906c3fb27SDimitry Andric 26900b57cec5SDimitry Andric KMP_ASSERT(data != NULL); 26910b57cec5SDimitry Andric while (tg != NULL) { 2692*0fca6ea1SDimitry Andric arr = (kmp_taskred_data_t *)(tg->reduce_data); 2693*0fca6ea1SDimitry Andric num = tg->reduce_num_data; 26940b57cec5SDimitry Andric for (int i = 0; i < num; ++i) { 26950b57cec5SDimitry Andric if (!arr[i].flags.lazy_priv) { 26960b57cec5SDimitry Andric if (data == arr[i].reduce_shar || 26970b57cec5SDimitry Andric (data >= arr[i].reduce_priv && data < arr[i].reduce_pend)) 26980b57cec5SDimitry Andric return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size; 26990b57cec5SDimitry Andric } else { 27000b57cec5SDimitry Andric // check shared location first 27010b57cec5SDimitry Andric void **p_priv = (void **)(arr[i].reduce_priv); 27020b57cec5SDimitry Andric if (data == arr[i].reduce_shar) 27030b57cec5SDimitry Andric goto found; 27040b57cec5SDimitry Andric // check if we get some thread specific location as parameter 27050b57cec5SDimitry Andric for (int j = 0; j < nth; ++j) 27060b57cec5SDimitry Andric if (data == p_priv[j]) 27070b57cec5SDimitry Andric goto found; 27080b57cec5SDimitry Andric continue; // not found, continue search 27090b57cec5SDimitry Andric found: 27100b57cec5SDimitry Andric if (p_priv[tid] == NULL) { 27110b57cec5SDimitry Andric // allocate thread specific object lazily 27120b57cec5SDimitry Andric p_priv[tid] = __kmp_allocate(arr[i].reduce_size); 27130b57cec5SDimitry Andric if (arr[i].reduce_init != NULL) { 27140b57cec5SDimitry Andric if (arr[i].reduce_orig != NULL) { // new interface 27150b57cec5SDimitry Andric ((void (*)(void *, void *))arr[i].reduce_init)( 27160b57cec5SDimitry Andric p_priv[tid], arr[i].reduce_orig); 27170b57cec5SDimitry Andric } else { // old interface (single parameter) 27180b57cec5SDimitry Andric ((void (*)(void *))arr[i].reduce_init)(p_priv[tid]); 27190b57cec5SDimitry Andric } 27200b57cec5SDimitry Andric } 27210b57cec5SDimitry Andric } 27220b57cec5SDimitry Andric return p_priv[tid]; 27230b57cec5SDimitry Andric } 27240b57cec5SDimitry Andric } 27255f757f3fSDimitry Andric KMP_ASSERT(tg->parent); 27260b57cec5SDimitry Andric tg = tg->parent; 27270b57cec5SDimitry Andric } 27280b57cec5SDimitry Andric KMP_ASSERT2(0, "Unknown task reduction item"); 27290b57cec5SDimitry Andric return NULL; // ERROR, this line never executed 27300b57cec5SDimitry Andric } 27310b57cec5SDimitry Andric 27320b57cec5SDimitry Andric // Finalize task reduction. 27330b57cec5SDimitry Andric // Called from __kmpc_end_taskgroup() 27340b57cec5SDimitry Andric static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) { 27350b57cec5SDimitry Andric kmp_int32 nth = th->th.th_team_nproc; 27365f757f3fSDimitry Andric KMP_DEBUG_ASSERT( 27375f757f3fSDimitry Andric nth > 1 || 27385f757f3fSDimitry Andric __kmp_enable_hidden_helper); // should not be called if nth == 1 unless we 27395f757f3fSDimitry Andric // are using hidden helper threads 27400b57cec5SDimitry Andric kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data; 27410b57cec5SDimitry Andric kmp_int32 num = tg->reduce_num_data; 27420b57cec5SDimitry Andric for (int i = 0; i < num; ++i) { 27430b57cec5SDimitry Andric void *sh_data = arr[i].reduce_shar; 27440b57cec5SDimitry Andric void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini); 27450b57cec5SDimitry Andric void (*f_comb)(void *, void *) = 27460b57cec5SDimitry Andric (void (*)(void *, void *))(arr[i].reduce_comb); 27470b57cec5SDimitry Andric if (!arr[i].flags.lazy_priv) { 27480b57cec5SDimitry Andric void *pr_data = arr[i].reduce_priv; 27490b57cec5SDimitry Andric size_t size = arr[i].reduce_size; 27500b57cec5SDimitry Andric for (int j = 0; j < nth; ++j) { 27510b57cec5SDimitry Andric void *priv_data = (char *)pr_data + j * size; 27520b57cec5SDimitry Andric f_comb(sh_data, priv_data); // combine results 27530b57cec5SDimitry Andric if (f_fini) 27540b57cec5SDimitry Andric f_fini(priv_data); // finalize if needed 27550b57cec5SDimitry Andric } 27560b57cec5SDimitry Andric } else { 27570b57cec5SDimitry Andric void **pr_data = (void **)(arr[i].reduce_priv); 27580b57cec5SDimitry Andric for (int j = 0; j < nth; ++j) { 27590b57cec5SDimitry Andric if (pr_data[j] != NULL) { 27600b57cec5SDimitry Andric f_comb(sh_data, pr_data[j]); // combine results 27610b57cec5SDimitry Andric if (f_fini) 27620b57cec5SDimitry Andric f_fini(pr_data[j]); // finalize if needed 27630b57cec5SDimitry Andric __kmp_free(pr_data[j]); 27640b57cec5SDimitry Andric } 27650b57cec5SDimitry Andric } 27660b57cec5SDimitry Andric } 27670b57cec5SDimitry Andric __kmp_free(arr[i].reduce_priv); 27680b57cec5SDimitry Andric } 27690b57cec5SDimitry Andric __kmp_thread_free(th, arr); 27700b57cec5SDimitry Andric tg->reduce_data = NULL; 27710b57cec5SDimitry Andric tg->reduce_num_data = 0; 27720b57cec5SDimitry Andric } 27730b57cec5SDimitry Andric 27740b57cec5SDimitry Andric // Cleanup task reduction data for parallel or worksharing, 27750b57cec5SDimitry Andric // do not touch task private data other threads still working with. 27760b57cec5SDimitry Andric // Called from __kmpc_end_taskgroup() 27770b57cec5SDimitry Andric static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) { 27780b57cec5SDimitry Andric __kmp_thread_free(th, tg->reduce_data); 27790b57cec5SDimitry Andric tg->reduce_data = NULL; 27800b57cec5SDimitry Andric tg->reduce_num_data = 0; 27810b57cec5SDimitry Andric } 27820b57cec5SDimitry Andric 27830b57cec5SDimitry Andric template <typename T> 27840b57cec5SDimitry Andric void *__kmp_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, 27850b57cec5SDimitry Andric int num, T *data) { 2786e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 27870b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 27880b57cec5SDimitry Andric kmp_int32 nth = thr->th.th_team_nproc; 27890b57cec5SDimitry Andric __kmpc_taskgroup(loc, gtid); // form new taskgroup first 27900b57cec5SDimitry Andric if (nth == 1) { 27910b57cec5SDimitry Andric KA_TRACE(10, 27920b57cec5SDimitry Andric ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n", 27930b57cec5SDimitry Andric gtid, thr->th.th_current_task->td_taskgroup)); 27940b57cec5SDimitry Andric return (void *)thr->th.th_current_task->td_taskgroup; 27950b57cec5SDimitry Andric } 27960b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 27970b57cec5SDimitry Andric void *reduce_data; 27980b57cec5SDimitry Andric kmp_taskgroup_t *tg; 27990b57cec5SDimitry Andric reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]); 28000b57cec5SDimitry Andric if (reduce_data == NULL && 28010b57cec5SDimitry Andric __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data, 28020b57cec5SDimitry Andric (void *)1)) { 28030b57cec5SDimitry Andric // single thread enters this block to initialize common reduction data 28040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(reduce_data == NULL); 28050b57cec5SDimitry Andric // first initialize own data, then make a copy other threads can use 28060b57cec5SDimitry Andric tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data); 28070b57cec5SDimitry Andric reduce_data = __kmp_thread_malloc(thr, num * sizeof(kmp_taskred_data_t)); 28080b57cec5SDimitry Andric KMP_MEMCPY(reduce_data, tg->reduce_data, num * sizeof(kmp_taskred_data_t)); 28090b57cec5SDimitry Andric // fini counters should be 0 at this point 28100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0); 28110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0); 28120b57cec5SDimitry Andric KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data); 28130b57cec5SDimitry Andric } else { 28140b57cec5SDimitry Andric while ( 28150b57cec5SDimitry Andric (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) == 28160b57cec5SDimitry Andric (void *)1) { // wait for task reduction initialization 28170b57cec5SDimitry Andric KMP_CPU_PAUSE(); 28180b57cec5SDimitry Andric } 28190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here 28200b57cec5SDimitry Andric tg = thr->th.th_current_task->td_taskgroup; 28210b57cec5SDimitry Andric __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data); 28220b57cec5SDimitry Andric } 28230b57cec5SDimitry Andric return tg; 28240b57cec5SDimitry Andric } 28250b57cec5SDimitry Andric 28260b57cec5SDimitry Andric /*! 28270b57cec5SDimitry Andric @ingroup TASKING 28280b57cec5SDimitry Andric @param loc Source location info 28290b57cec5SDimitry Andric @param gtid Global thread ID 28300b57cec5SDimitry Andric @param is_ws Is 1 if the reduction is for worksharing, 0 otherwise 28310b57cec5SDimitry Andric @param num Number of data items to reduce 28320b57cec5SDimitry Andric @param data Array of data for reduction 28330b57cec5SDimitry Andric @return The taskgroup identifier 28340b57cec5SDimitry Andric 28350b57cec5SDimitry Andric Initialize task reduction for a parallel or worksharing. 28360b57cec5SDimitry Andric 28370b57cec5SDimitry Andric Note: this entry supposes the optional compiler-generated initializer routine 28380b57cec5SDimitry Andric has single parameter - pointer to object to be initialized. That means 28390b57cec5SDimitry Andric the reduction either does not use omp_orig object, or the omp_orig is accessible 28400b57cec5SDimitry Andric without help of the runtime library. 28410b57cec5SDimitry Andric */ 28420b57cec5SDimitry Andric void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, 28430b57cec5SDimitry Andric int num, void *data) { 28440b57cec5SDimitry Andric return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num, 28450b57cec5SDimitry Andric (kmp_task_red_input_t *)data); 28460b57cec5SDimitry Andric } 28470b57cec5SDimitry Andric 28480b57cec5SDimitry Andric /*! 28490b57cec5SDimitry Andric @ingroup TASKING 28500b57cec5SDimitry Andric @param loc Source location info 28510b57cec5SDimitry Andric @param gtid Global thread ID 28520b57cec5SDimitry Andric @param is_ws Is 1 if the reduction is for worksharing, 0 otherwise 28530b57cec5SDimitry Andric @param num Number of data items to reduce 28540b57cec5SDimitry Andric @param data Array of data for reduction 28550b57cec5SDimitry Andric @return The taskgroup identifier 28560b57cec5SDimitry Andric 28570b57cec5SDimitry Andric Initialize task reduction for a parallel or worksharing. 28580b57cec5SDimitry Andric 28590b57cec5SDimitry Andric Note: this entry supposes the optional compiler-generated initializer routine 28600b57cec5SDimitry Andric has two parameters, pointer to object to be initialized and pointer to omp_orig 28610b57cec5SDimitry Andric */ 28620b57cec5SDimitry Andric void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, 28630b57cec5SDimitry Andric void *data) { 28640b57cec5SDimitry Andric return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num, 28650b57cec5SDimitry Andric (kmp_taskred_input_t *)data); 28660b57cec5SDimitry Andric } 28670b57cec5SDimitry Andric 28680b57cec5SDimitry Andric /*! 28690b57cec5SDimitry Andric @ingroup TASKING 28700b57cec5SDimitry Andric @param loc Source location info 28710b57cec5SDimitry Andric @param gtid Global thread ID 28720b57cec5SDimitry Andric @param is_ws Is 1 if the reduction is for worksharing, 0 otherwise 28730b57cec5SDimitry Andric 28740b57cec5SDimitry Andric Finalize task reduction for a parallel or worksharing. 28750b57cec5SDimitry Andric */ 28760b57cec5SDimitry Andric void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws) { 28770b57cec5SDimitry Andric __kmpc_end_taskgroup(loc, gtid); 28780b57cec5SDimitry Andric } 28790b57cec5SDimitry Andric 28800b57cec5SDimitry Andric // __kmpc_taskgroup: Start a new taskgroup 28810b57cec5SDimitry Andric void __kmpc_taskgroup(ident_t *loc, int gtid) { 2882e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 28830b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 28840b57cec5SDimitry Andric kmp_taskdata_t *taskdata = thread->th.th_current_task; 28850b57cec5SDimitry Andric kmp_taskgroup_t *tg_new = 28860b57cec5SDimitry Andric (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t)); 28870b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new)); 28880b57cec5SDimitry Andric KMP_ATOMIC_ST_RLX(&tg_new->count, 0); 28890b57cec5SDimitry Andric KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq); 28900b57cec5SDimitry Andric tg_new->parent = taskdata->td_taskgroup; 28910b57cec5SDimitry Andric tg_new->reduce_data = NULL; 28920b57cec5SDimitry Andric tg_new->reduce_num_data = 0; 2893fe6060f1SDimitry Andric tg_new->gomp_data = NULL; 28940b57cec5SDimitry Andric taskdata->td_taskgroup = tg_new; 28950b57cec5SDimitry Andric 28960b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 28970b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { 28980b57cec5SDimitry Andric void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 28990b57cec5SDimitry Andric if (!codeptr) 29000b57cec5SDimitry Andric codeptr = OMPT_GET_RETURN_ADDRESS(0); 29010b57cec5SDimitry Andric kmp_team_t *team = thread->th.th_team; 29020b57cec5SDimitry Andric ompt_data_t my_task_data = taskdata->ompt_task_info.task_data; 29030b57cec5SDimitry Andric // FIXME: I think this is wrong for lwt! 29040b57cec5SDimitry Andric ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data; 29050b57cec5SDimitry Andric 29060b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 29070b57cec5SDimitry Andric ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), 29080b57cec5SDimitry Andric &(my_task_data), codeptr); 29090b57cec5SDimitry Andric } 29100b57cec5SDimitry Andric #endif 29110b57cec5SDimitry Andric } 29120b57cec5SDimitry Andric 29130b57cec5SDimitry Andric // __kmpc_end_taskgroup: Wait until all tasks generated by the current task 29140b57cec5SDimitry Andric // and its descendants are complete 29150b57cec5SDimitry Andric void __kmpc_end_taskgroup(ident_t *loc, int gtid) { 2916e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 29170b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 29180b57cec5SDimitry Andric kmp_taskdata_t *taskdata = thread->th.th_current_task; 29190b57cec5SDimitry Andric kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; 29200b57cec5SDimitry Andric int thread_finished = FALSE; 29210b57cec5SDimitry Andric 29220b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29230b57cec5SDimitry Andric kmp_team_t *team; 29240b57cec5SDimitry Andric ompt_data_t my_task_data; 29250b57cec5SDimitry Andric ompt_data_t my_parallel_data; 2926fe6060f1SDimitry Andric void *codeptr = nullptr; 29270b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) { 29280b57cec5SDimitry Andric team = thread->th.th_team; 29290b57cec5SDimitry Andric my_task_data = taskdata->ompt_task_info.task_data; 29300b57cec5SDimitry Andric // FIXME: I think this is wrong for lwt! 29310b57cec5SDimitry Andric my_parallel_data = team->t.ompt_team_info.parallel_data; 29320b57cec5SDimitry Andric codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 29330b57cec5SDimitry Andric if (!codeptr) 29340b57cec5SDimitry Andric codeptr = OMPT_GET_RETURN_ADDRESS(0); 29350b57cec5SDimitry Andric } 29360b57cec5SDimitry Andric #endif 29370b57cec5SDimitry Andric 29380b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc)); 29390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskgroup != NULL); 29400b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(TASKGROUP); 29410b57cec5SDimitry Andric 29420b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 29430b57cec5SDimitry Andric // mark task as waiting not on a barrier 29440b57cec5SDimitry Andric taskdata->td_taskwait_counter += 1; 29450b57cec5SDimitry Andric taskdata->td_taskwait_ident = loc; 29460b57cec5SDimitry Andric taskdata->td_taskwait_thread = gtid + 1; 29470b57cec5SDimitry Andric #if USE_ITT_BUILD 29480b57cec5SDimitry Andric // For ITT the taskgroup wait is similar to taskwait until we need to 29490b57cec5SDimitry Andric // distinguish them 2950fe6060f1SDimitry Andric void *itt_sync_obj = NULL; 2951fe6060f1SDimitry Andric #if USE_ITT_NOTIFY 2952fe6060f1SDimitry Andric KMP_ITT_TASKWAIT_STARTING(itt_sync_obj); 2953fe6060f1SDimitry Andric #endif /* USE_ITT_NOTIFY */ 29540b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 29550b57cec5SDimitry Andric 29560b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29570b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) { 29580b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 29590b57cec5SDimitry Andric ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), 29600b57cec5SDimitry Andric &(my_task_data), codeptr); 29610b57cec5SDimitry Andric } 29620b57cec5SDimitry Andric #endif 29630b57cec5SDimitry Andric 29640b57cec5SDimitry Andric if (!taskdata->td_flags.team_serial || 29650b57cec5SDimitry Andric (thread->th.th_task_team != NULL && 2966fe6060f1SDimitry Andric (thread->th.th_task_team->tt.tt_found_proxy_tasks || 2967fe6060f1SDimitry Andric thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) { 2968e8d8bef9SDimitry Andric kmp_flag_32<false, false> flag( 2969e8d8bef9SDimitry Andric RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U); 29700b57cec5SDimitry Andric while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) { 29710b57cec5SDimitry Andric flag.execute_tasks(thread, gtid, FALSE, 29720b57cec5SDimitry Andric &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), 29730b57cec5SDimitry Andric __kmp_task_stealing_constraint); 29740b57cec5SDimitry Andric } 29750b57cec5SDimitry Andric } 29760b57cec5SDimitry Andric taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; // end waiting 29770b57cec5SDimitry Andric 29780b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 29790b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) { 29800b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 29810b57cec5SDimitry Andric ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), 29820b57cec5SDimitry Andric &(my_task_data), codeptr); 29830b57cec5SDimitry Andric } 29840b57cec5SDimitry Andric #endif 29850b57cec5SDimitry Andric 29860b57cec5SDimitry Andric #if USE_ITT_BUILD 2987fe6060f1SDimitry Andric KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj); 2988e8d8bef9SDimitry Andric KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with descendants 29890b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 29900b57cec5SDimitry Andric } 29910b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskgroup->count == 0); 29920b57cec5SDimitry Andric 2993fe6060f1SDimitry Andric if (taskgroup->reduce_data != NULL && 2994fe6060f1SDimitry Andric !taskgroup->gomp_data) { // need to reduce? 29950b57cec5SDimitry Andric int cnt; 29960b57cec5SDimitry Andric void *reduce_data; 29970b57cec5SDimitry Andric kmp_team_t *t = thread->th.th_team; 29980b57cec5SDimitry Andric kmp_taskred_data_t *arr = (kmp_taskred_data_t *)taskgroup->reduce_data; 29990b57cec5SDimitry Andric // check if <priv> data of the first reduction variable shared for the team 30000b57cec5SDimitry Andric void *priv0 = arr[0].reduce_priv; 30010b57cec5SDimitry Andric if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL && 30020b57cec5SDimitry Andric ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) { 30030b57cec5SDimitry Andric // finishing task reduction on parallel 30040b57cec5SDimitry Andric cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]); 30050b57cec5SDimitry Andric if (cnt == thread->th.th_team_nproc - 1) { 30060b57cec5SDimitry Andric // we are the last thread passing __kmpc_reduction_modifier_fini() 30070b57cec5SDimitry Andric // finalize task reduction: 30080b57cec5SDimitry Andric __kmp_task_reduction_fini(thread, taskgroup); 30090b57cec5SDimitry Andric // cleanup fields in the team structure: 30100b57cec5SDimitry Andric // TODO: is relaxed store enough here (whole barrier should follow)? 30110b57cec5SDimitry Andric __kmp_thread_free(thread, reduce_data); 30120b57cec5SDimitry Andric KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL); 30130b57cec5SDimitry Andric KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0); 30140b57cec5SDimitry Andric } else { 30150b57cec5SDimitry Andric // we are not the last thread passing __kmpc_reduction_modifier_fini(), 30160b57cec5SDimitry Andric // so do not finalize reduction, just clean own copy of the data 30170b57cec5SDimitry Andric __kmp_task_reduction_clean(thread, taskgroup); 30180b57cec5SDimitry Andric } 30190b57cec5SDimitry Andric } else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) != 30200b57cec5SDimitry Andric NULL && 30210b57cec5SDimitry Andric ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) { 30220b57cec5SDimitry Andric // finishing task reduction on worksharing 30230b57cec5SDimitry Andric cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]); 30240b57cec5SDimitry Andric if (cnt == thread->th.th_team_nproc - 1) { 30250b57cec5SDimitry Andric // we are the last thread passing __kmpc_reduction_modifier_fini() 30260b57cec5SDimitry Andric __kmp_task_reduction_fini(thread, taskgroup); 30270b57cec5SDimitry Andric // cleanup fields in team structure: 30280b57cec5SDimitry Andric // TODO: is relaxed store enough here (whole barrier should follow)? 30290b57cec5SDimitry Andric __kmp_thread_free(thread, reduce_data); 30300b57cec5SDimitry Andric KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL); 30310b57cec5SDimitry Andric KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0); 30320b57cec5SDimitry Andric } else { 30330b57cec5SDimitry Andric // we are not the last thread passing __kmpc_reduction_modifier_fini(), 30340b57cec5SDimitry Andric // so do not finalize reduction, just clean own copy of the data 30350b57cec5SDimitry Andric __kmp_task_reduction_clean(thread, taskgroup); 30360b57cec5SDimitry Andric } 30370b57cec5SDimitry Andric } else { 30380b57cec5SDimitry Andric // finishing task reduction on taskgroup 30390b57cec5SDimitry Andric __kmp_task_reduction_fini(thread, taskgroup); 30400b57cec5SDimitry Andric } 30410b57cec5SDimitry Andric } 30420b57cec5SDimitry Andric // Restore parent taskgroup for the current task 30430b57cec5SDimitry Andric taskdata->td_taskgroup = taskgroup->parent; 30440b57cec5SDimitry Andric __kmp_thread_free(thread, taskgroup); 30450b57cec5SDimitry Andric 30460b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", 30470b57cec5SDimitry Andric gtid, taskdata)); 30480b57cec5SDimitry Andric 30490b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 30500b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { 30510b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 30520b57cec5SDimitry Andric ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), 30530b57cec5SDimitry Andric &(my_task_data), codeptr); 30540b57cec5SDimitry Andric } 30550b57cec5SDimitry Andric #endif 30560b57cec5SDimitry Andric } 30570b57cec5SDimitry Andric 305881ad6265SDimitry Andric static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid, 305981ad6265SDimitry Andric kmp_task_team_t *task_team, 306081ad6265SDimitry Andric kmp_int32 is_constrained) { 306181ad6265SDimitry Andric kmp_task_t *task = NULL; 306281ad6265SDimitry Andric kmp_taskdata_t *taskdata; 306381ad6265SDimitry Andric kmp_taskdata_t *current; 306481ad6265SDimitry Andric kmp_thread_data_t *thread_data; 306581ad6265SDimitry Andric int ntasks = task_team->tt.tt_num_task_pri; 306681ad6265SDimitry Andric if (ntasks == 0) { 306781ad6265SDimitry Andric KA_TRACE( 306881ad6265SDimitry Andric 20, ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid)); 306981ad6265SDimitry Andric return NULL; 307081ad6265SDimitry Andric } 307181ad6265SDimitry Andric do { 307281ad6265SDimitry Andric // decrement num_tasks to "reserve" one task to get for execution 307381ad6265SDimitry Andric if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks, 307481ad6265SDimitry Andric ntasks - 1)) 307581ad6265SDimitry Andric break; 307606c3fb27SDimitry Andric ntasks = task_team->tt.tt_num_task_pri; 307781ad6265SDimitry Andric } while (ntasks > 0); 307881ad6265SDimitry Andric if (ntasks == 0) { 307981ad6265SDimitry Andric KA_TRACE(20, ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n", 308081ad6265SDimitry Andric __kmp_get_gtid())); 308181ad6265SDimitry Andric return NULL; 308281ad6265SDimitry Andric } 308381ad6265SDimitry Andric // We got a "ticket" to get a "reserved" priority task 308481ad6265SDimitry Andric int deque_ntasks; 308581ad6265SDimitry Andric kmp_task_pri_t *list = task_team->tt.tt_task_pri_list; 308681ad6265SDimitry Andric do { 308781ad6265SDimitry Andric KMP_ASSERT(list != NULL); 308881ad6265SDimitry Andric thread_data = &list->td; 308981ad6265SDimitry Andric __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); 309081ad6265SDimitry Andric deque_ntasks = thread_data->td.td_deque_ntasks; 309181ad6265SDimitry Andric if (deque_ntasks == 0) { 309281ad6265SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 309381ad6265SDimitry Andric KA_TRACE(20, ("__kmp_get_priority_task: T#%d No tasks to get from %p\n", 309481ad6265SDimitry Andric __kmp_get_gtid(), thread_data)); 309581ad6265SDimitry Andric list = list->next; 309681ad6265SDimitry Andric } 309781ad6265SDimitry Andric } while (deque_ntasks == 0); 309881ad6265SDimitry Andric KMP_DEBUG_ASSERT(deque_ntasks); 309981ad6265SDimitry Andric int target = thread_data->td.td_deque_head; 310081ad6265SDimitry Andric current = __kmp_threads[gtid]->th.th_current_task; 310181ad6265SDimitry Andric taskdata = thread_data->td.td_deque[target]; 310281ad6265SDimitry Andric if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) { 310381ad6265SDimitry Andric // Bump head pointer and Wrap. 310481ad6265SDimitry Andric thread_data->td.td_deque_head = 310581ad6265SDimitry Andric (target + 1) & TASK_DEQUE_MASK(thread_data->td); 310681ad6265SDimitry Andric } else { 310781ad6265SDimitry Andric if (!task_team->tt.tt_untied_task_encountered) { 310881ad6265SDimitry Andric // The TSC does not allow to steal victim task 310981ad6265SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 311081ad6265SDimitry Andric KA_TRACE(20, ("__kmp_get_priority_task(exit #3): T#%d could not get task " 311181ad6265SDimitry Andric "from %p: task_team=%p ntasks=%d head=%u tail=%u\n", 311281ad6265SDimitry Andric gtid, thread_data, task_team, deque_ntasks, target, 311381ad6265SDimitry Andric thread_data->td.td_deque_tail)); 311481ad6265SDimitry Andric task_team->tt.tt_num_task_pri++; // atomic inc, restore value 311581ad6265SDimitry Andric return NULL; 311681ad6265SDimitry Andric } 311781ad6265SDimitry Andric int i; 311881ad6265SDimitry Andric // walk through the deque trying to steal any task 311981ad6265SDimitry Andric taskdata = NULL; 312081ad6265SDimitry Andric for (i = 1; i < deque_ntasks; ++i) { 312181ad6265SDimitry Andric target = (target + 1) & TASK_DEQUE_MASK(thread_data->td); 312281ad6265SDimitry Andric taskdata = thread_data->td.td_deque[target]; 312381ad6265SDimitry Andric if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) { 312481ad6265SDimitry Andric break; // found task to execute 312581ad6265SDimitry Andric } else { 312681ad6265SDimitry Andric taskdata = NULL; 312781ad6265SDimitry Andric } 312881ad6265SDimitry Andric } 312981ad6265SDimitry Andric if (taskdata == NULL) { 313081ad6265SDimitry Andric // No appropriate candidate found to execute 313181ad6265SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 313281ad6265SDimitry Andric KA_TRACE( 313381ad6265SDimitry Andric 10, ("__kmp_get_priority_task(exit #4): T#%d could not get task from " 313481ad6265SDimitry Andric "%p: task_team=%p ntasks=%d head=%u tail=%u\n", 313581ad6265SDimitry Andric gtid, thread_data, task_team, deque_ntasks, 313681ad6265SDimitry Andric thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); 313781ad6265SDimitry Andric task_team->tt.tt_num_task_pri++; // atomic inc, restore value 313881ad6265SDimitry Andric return NULL; 313981ad6265SDimitry Andric } 314081ad6265SDimitry Andric int prev = target; 314181ad6265SDimitry Andric for (i = i + 1; i < deque_ntasks; ++i) { 314281ad6265SDimitry Andric // shift remaining tasks in the deque left by 1 314381ad6265SDimitry Andric target = (target + 1) & TASK_DEQUE_MASK(thread_data->td); 314481ad6265SDimitry Andric thread_data->td.td_deque[prev] = thread_data->td.td_deque[target]; 314581ad6265SDimitry Andric prev = target; 314681ad6265SDimitry Andric } 314781ad6265SDimitry Andric KMP_DEBUG_ASSERT( 314881ad6265SDimitry Andric thread_data->td.td_deque_tail == 314981ad6265SDimitry Andric (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td))); 315081ad6265SDimitry Andric thread_data->td.td_deque_tail = target; // tail -= 1 (wrapped)) 315181ad6265SDimitry Andric } 315281ad6265SDimitry Andric thread_data->td.td_deque_ntasks = deque_ntasks - 1; 315381ad6265SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 315481ad6265SDimitry Andric task = KMP_TASKDATA_TO_TASK(taskdata); 315581ad6265SDimitry Andric return task; 315681ad6265SDimitry Andric } 315781ad6265SDimitry Andric 31580b57cec5SDimitry Andric // __kmp_remove_my_task: remove a task from my own deque 31590b57cec5SDimitry Andric static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid, 31600b57cec5SDimitry Andric kmp_task_team_t *task_team, 31610b57cec5SDimitry Andric kmp_int32 is_constrained) { 31620b57cec5SDimitry Andric kmp_task_t *task; 31630b57cec5SDimitry Andric kmp_taskdata_t *taskdata; 31640b57cec5SDimitry Andric kmp_thread_data_t *thread_data; 31650b57cec5SDimitry Andric kmp_uint32 tail; 31660b57cec5SDimitry Andric 31670b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); 31680b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data != 31690b57cec5SDimitry Andric NULL); // Caller should check this condition 31700b57cec5SDimitry Andric 31710b57cec5SDimitry Andric thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; 31720b57cec5SDimitry Andric 31730b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n", 31740b57cec5SDimitry Andric gtid, thread_data->td.td_deque_ntasks, 31750b57cec5SDimitry Andric thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); 31760b57cec5SDimitry Andric 31770b57cec5SDimitry Andric if (TCR_4(thread_data->td.td_deque_ntasks) == 0) { 31780b57cec5SDimitry Andric KA_TRACE(10, 31790b57cec5SDimitry Andric ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " 31800b57cec5SDimitry Andric "ntasks=%d head=%u tail=%u\n", 31810b57cec5SDimitry Andric gtid, thread_data->td.td_deque_ntasks, 31820b57cec5SDimitry Andric thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); 31830b57cec5SDimitry Andric return NULL; 31840b57cec5SDimitry Andric } 31850b57cec5SDimitry Andric 31860b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); 31870b57cec5SDimitry Andric 31880b57cec5SDimitry Andric if (TCR_4(thread_data->td.td_deque_ntasks) == 0) { 31890b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 31900b57cec5SDimitry Andric KA_TRACE(10, 31910b57cec5SDimitry Andric ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 31920b57cec5SDimitry Andric "ntasks=%d head=%u tail=%u\n", 31930b57cec5SDimitry Andric gtid, thread_data->td.td_deque_ntasks, 31940b57cec5SDimitry Andric thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); 31950b57cec5SDimitry Andric return NULL; 31960b57cec5SDimitry Andric } 31970b57cec5SDimitry Andric 31980b57cec5SDimitry Andric tail = (thread_data->td.td_deque_tail - 1) & 31990b57cec5SDimitry Andric TASK_DEQUE_MASK(thread_data->td); // Wrap index. 32000b57cec5SDimitry Andric taskdata = thread_data->td.td_deque[tail]; 32010b57cec5SDimitry Andric 32020b57cec5SDimitry Andric if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata, 32030b57cec5SDimitry Andric thread->th.th_current_task)) { 32040b57cec5SDimitry Andric // The TSC does not allow to steal victim task 32050b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 32060b57cec5SDimitry Andric KA_TRACE(10, 32070b57cec5SDimitry Andric ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: " 32080b57cec5SDimitry Andric "ntasks=%d head=%u tail=%u\n", 32090b57cec5SDimitry Andric gtid, thread_data->td.td_deque_ntasks, 32100b57cec5SDimitry Andric thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); 32110b57cec5SDimitry Andric return NULL; 32120b57cec5SDimitry Andric } 32130b57cec5SDimitry Andric 32140b57cec5SDimitry Andric thread_data->td.td_deque_tail = tail; 32150b57cec5SDimitry Andric TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1); 32160b57cec5SDimitry Andric 32170b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 32180b57cec5SDimitry Andric 32190b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_remove_my_task(exit #4): T#%d task %p removed: " 32200b57cec5SDimitry Andric "ntasks=%d head=%u tail=%u\n", 32210b57cec5SDimitry Andric gtid, taskdata, thread_data->td.td_deque_ntasks, 32220b57cec5SDimitry Andric thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); 32230b57cec5SDimitry Andric 32240b57cec5SDimitry Andric task = KMP_TASKDATA_TO_TASK(taskdata); 32250b57cec5SDimitry Andric return task; 32260b57cec5SDimitry Andric } 32270b57cec5SDimitry Andric 32280b57cec5SDimitry Andric // __kmp_steal_task: remove a task from another thread's deque 32290b57cec5SDimitry Andric // Assume that calling thread has already checked existence of 32300b57cec5SDimitry Andric // task_team thread_data before calling this routine. 3231*0fca6ea1SDimitry Andric static kmp_task_t *__kmp_steal_task(kmp_int32 victim_tid, kmp_int32 gtid, 32320b57cec5SDimitry Andric kmp_task_team_t *task_team, 32330b57cec5SDimitry Andric std::atomic<kmp_int32> *unfinished_threads, 32340b57cec5SDimitry Andric int *thread_finished, 32350b57cec5SDimitry Andric kmp_int32 is_constrained) { 32360b57cec5SDimitry Andric kmp_task_t *task; 32370b57cec5SDimitry Andric kmp_taskdata_t *taskdata; 32380b57cec5SDimitry Andric kmp_taskdata_t *current; 32390b57cec5SDimitry Andric kmp_thread_data_t *victim_td, *threads_data; 32400b57cec5SDimitry Andric kmp_int32 target; 3241*0fca6ea1SDimitry Andric kmp_info_t *victim_thr; 32420b57cec5SDimitry Andric 32430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); 32440b57cec5SDimitry Andric 32450b57cec5SDimitry Andric threads_data = task_team->tt.tt_threads_data; 32460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition 3247*0fca6ea1SDimitry Andric KMP_DEBUG_ASSERT(victim_tid >= 0); 3248*0fca6ea1SDimitry Andric KMP_DEBUG_ASSERT(victim_tid < task_team->tt.tt_nproc); 32490b57cec5SDimitry Andric 32500b57cec5SDimitry Andric victim_td = &threads_data[victim_tid]; 3251*0fca6ea1SDimitry Andric victim_thr = victim_td->td.td_thr; 3252*0fca6ea1SDimitry Andric (void)victim_thr; // Use in TRACE messages which aren't always enabled. 32530b57cec5SDimitry Andric 32540b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: " 32550b57cec5SDimitry Andric "task_team=%p ntasks=%d head=%u tail=%u\n", 32560b57cec5SDimitry Andric gtid, __kmp_gtid_from_thread(victim_thr), task_team, 32570b57cec5SDimitry Andric victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, 32580b57cec5SDimitry Andric victim_td->td.td_deque_tail)); 32590b57cec5SDimitry Andric 32600b57cec5SDimitry Andric if (TCR_4(victim_td->td.td_deque_ntasks) == 0) { 32610b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: " 32620b57cec5SDimitry Andric "task_team=%p ntasks=%d head=%u tail=%u\n", 32630b57cec5SDimitry Andric gtid, __kmp_gtid_from_thread(victim_thr), task_team, 32640b57cec5SDimitry Andric victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, 32650b57cec5SDimitry Andric victim_td->td.td_deque_tail)); 32660b57cec5SDimitry Andric return NULL; 32670b57cec5SDimitry Andric } 32680b57cec5SDimitry Andric 32690b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock); 32700b57cec5SDimitry Andric 32710b57cec5SDimitry Andric int ntasks = TCR_4(victim_td->td.td_deque_ntasks); 32720b57cec5SDimitry Andric // Check again after we acquire the lock 32730b57cec5SDimitry Andric if (ntasks == 0) { 32740b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); 32750b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: " 32760b57cec5SDimitry Andric "task_team=%p ntasks=%d head=%u tail=%u\n", 32770b57cec5SDimitry Andric gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks, 32780b57cec5SDimitry Andric victim_td->td.td_deque_head, victim_td->td.td_deque_tail)); 32790b57cec5SDimitry Andric return NULL; 32800b57cec5SDimitry Andric } 32810b57cec5SDimitry Andric 32820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL); 32830b57cec5SDimitry Andric current = __kmp_threads[gtid]->th.th_current_task; 32840b57cec5SDimitry Andric taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head]; 32850b57cec5SDimitry Andric if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) { 32860b57cec5SDimitry Andric // Bump head pointer and Wrap. 32870b57cec5SDimitry Andric victim_td->td.td_deque_head = 32880b57cec5SDimitry Andric (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td); 32890b57cec5SDimitry Andric } else { 32900b57cec5SDimitry Andric if (!task_team->tt.tt_untied_task_encountered) { 32910b57cec5SDimitry Andric // The TSC does not allow to steal victim task 32920b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); 32930b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d could not steal from " 32940b57cec5SDimitry Andric "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", 32950b57cec5SDimitry Andric gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks, 32960b57cec5SDimitry Andric victim_td->td.td_deque_head, victim_td->td.td_deque_tail)); 32970b57cec5SDimitry Andric return NULL; 32980b57cec5SDimitry Andric } 32990b57cec5SDimitry Andric int i; 33000b57cec5SDimitry Andric // walk through victim's deque trying to steal any task 33010b57cec5SDimitry Andric target = victim_td->td.td_deque_head; 33020b57cec5SDimitry Andric taskdata = NULL; 33030b57cec5SDimitry Andric for (i = 1; i < ntasks; ++i) { 33040b57cec5SDimitry Andric target = (target + 1) & TASK_DEQUE_MASK(victim_td->td); 33050b57cec5SDimitry Andric taskdata = victim_td->td.td_deque[target]; 33060b57cec5SDimitry Andric if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) { 33070b57cec5SDimitry Andric break; // found victim task 33080b57cec5SDimitry Andric } else { 33090b57cec5SDimitry Andric taskdata = NULL; 33100b57cec5SDimitry Andric } 33110b57cec5SDimitry Andric } 33120b57cec5SDimitry Andric if (taskdata == NULL) { 33130b57cec5SDimitry Andric // No appropriate candidate to steal found 33140b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); 33150b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_steal_task(exit #4): T#%d could not steal from " 33160b57cec5SDimitry Andric "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", 33170b57cec5SDimitry Andric gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks, 33180b57cec5SDimitry Andric victim_td->td.td_deque_head, victim_td->td.td_deque_tail)); 33190b57cec5SDimitry Andric return NULL; 33200b57cec5SDimitry Andric } 33210b57cec5SDimitry Andric int prev = target; 33220b57cec5SDimitry Andric for (i = i + 1; i < ntasks; ++i) { 33230b57cec5SDimitry Andric // shift remaining tasks in the deque left by 1 33240b57cec5SDimitry Andric target = (target + 1) & TASK_DEQUE_MASK(victim_td->td); 33250b57cec5SDimitry Andric victim_td->td.td_deque[prev] = victim_td->td.td_deque[target]; 33260b57cec5SDimitry Andric prev = target; 33270b57cec5SDimitry Andric } 33280b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 33290b57cec5SDimitry Andric victim_td->td.td_deque_tail == 33300b57cec5SDimitry Andric (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td))); 33310b57cec5SDimitry Andric victim_td->td.td_deque_tail = target; // tail -= 1 (wrapped)) 33320b57cec5SDimitry Andric } 33330b57cec5SDimitry Andric if (*thread_finished) { 33340b57cec5SDimitry Andric // We need to un-mark this victim as a finished victim. This must be done 33350b57cec5SDimitry Andric // before releasing the lock, or else other threads (starting with the 3336fe6060f1SDimitry Andric // primary thread victim) might be prematurely released from the barrier!!! 3337349cc55cSDimitry Andric #if KMP_DEBUG 3338349cc55cSDimitry Andric kmp_int32 count = 3339349cc55cSDimitry Andric #endif 3340349cc55cSDimitry Andric KMP_ATOMIC_INC(unfinished_threads); 33410b57cec5SDimitry Andric KA_TRACE( 33420b57cec5SDimitry Andric 20, 33430b57cec5SDimitry Andric ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n", 33440b57cec5SDimitry Andric gtid, count + 1, task_team)); 33450b57cec5SDimitry Andric *thread_finished = FALSE; 33460b57cec5SDimitry Andric } 33470b57cec5SDimitry Andric TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1); 33480b57cec5SDimitry Andric 33490b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); 33500b57cec5SDimitry Andric 33510b57cec5SDimitry Andric KMP_COUNT_BLOCK(TASK_stolen); 33520b57cec5SDimitry Andric KA_TRACE(10, 33530b57cec5SDimitry Andric ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: " 33540b57cec5SDimitry Andric "task_team=%p ntasks=%d head=%u tail=%u\n", 33550b57cec5SDimitry Andric gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team, 33560b57cec5SDimitry Andric ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail)); 33570b57cec5SDimitry Andric 33580b57cec5SDimitry Andric task = KMP_TASKDATA_TO_TASK(taskdata); 33590b57cec5SDimitry Andric return task; 33600b57cec5SDimitry Andric } 33610b57cec5SDimitry Andric 33620b57cec5SDimitry Andric // __kmp_execute_tasks_template: Choose and execute tasks until either the 33630b57cec5SDimitry Andric // condition is statisfied (return true) or there are none left (return false). 33640b57cec5SDimitry Andric // 33650b57cec5SDimitry Andric // final_spin is TRUE if this is the spin at the release barrier. 33660b57cec5SDimitry Andric // thread_finished indicates whether the thread is finished executing all 33670b57cec5SDimitry Andric // the tasks it has on its deque, and is at the release barrier. 33680b57cec5SDimitry Andric // spinner is the location on which to spin. 33690b57cec5SDimitry Andric // spinner == NULL means only execute a single task and return. 33700b57cec5SDimitry Andric // checker is the value to check to terminate the spin. 33710b57cec5SDimitry Andric template <class C> 33720b57cec5SDimitry Andric static inline int __kmp_execute_tasks_template( 33730b57cec5SDimitry Andric kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin, 33740b57cec5SDimitry Andric int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 33750b57cec5SDimitry Andric kmp_int32 is_constrained) { 33760b57cec5SDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 33770b57cec5SDimitry Andric kmp_thread_data_t *threads_data; 33780b57cec5SDimitry Andric kmp_task_t *task; 33790b57cec5SDimitry Andric kmp_info_t *other_thread; 33800b57cec5SDimitry Andric kmp_taskdata_t *current_task = thread->th.th_current_task; 33810b57cec5SDimitry Andric std::atomic<kmp_int32> *unfinished_threads; 33820b57cec5SDimitry Andric kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0, 33830b57cec5SDimitry Andric tid = thread->th.th_info.ds.ds_tid; 33840b57cec5SDimitry Andric 33850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); 33860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]); 33870b57cec5SDimitry Andric 33880b57cec5SDimitry Andric if (task_team == NULL || current_task == NULL) 33890b57cec5SDimitry Andric return FALSE; 33900b57cec5SDimitry Andric 33910b57cec5SDimitry Andric KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d " 33920b57cec5SDimitry Andric "*thread_finished=%d\n", 33930b57cec5SDimitry Andric gtid, final_spin, *thread_finished)); 33940b57cec5SDimitry Andric 33950b57cec5SDimitry Andric thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 33960b57cec5SDimitry Andric threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data); 3397e8d8bef9SDimitry Andric 33980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(threads_data != NULL); 33990b57cec5SDimitry Andric 34000b57cec5SDimitry Andric nthreads = task_team->tt.tt_nproc; 34010b57cec5SDimitry Andric unfinished_threads = &(task_team->tt.tt_unfinished_threads); 34020b57cec5SDimitry Andric KMP_DEBUG_ASSERT(*unfinished_threads >= 0); 34030b57cec5SDimitry Andric 34040b57cec5SDimitry Andric while (1) { // Outer loop keeps trying to find tasks in case of single thread 34050b57cec5SDimitry Andric // getting tasks from target constructs 34060b57cec5SDimitry Andric while (1) { // Inner loop to find a task and execute it 34070b57cec5SDimitry Andric task = NULL; 340881ad6265SDimitry Andric if (task_team->tt.tt_num_task_pri) { // get priority task first 340981ad6265SDimitry Andric task = __kmp_get_priority_task(gtid, task_team, is_constrained); 341081ad6265SDimitry Andric } 341181ad6265SDimitry Andric if (task == NULL && use_own_tasks) { // check own queue next 34120b57cec5SDimitry Andric task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained); 34130b57cec5SDimitry Andric } 341481ad6265SDimitry Andric if ((task == NULL) && (nthreads > 1)) { // Steal a task finally 34150b57cec5SDimitry Andric int asleep = 1; 34160b57cec5SDimitry Andric use_own_tasks = 0; 34170b57cec5SDimitry Andric // Try to steal from the last place I stole from successfully. 34180b57cec5SDimitry Andric if (victim_tid == -2) { // haven't stolen anything yet 34190b57cec5SDimitry Andric victim_tid = threads_data[tid].td.td_deque_last_stolen; 34200b57cec5SDimitry Andric if (victim_tid != 34210b57cec5SDimitry Andric -1) // if we have a last stolen from victim, get the thread 34220b57cec5SDimitry Andric other_thread = threads_data[victim_tid].td.td_thr; 34230b57cec5SDimitry Andric } 34240b57cec5SDimitry Andric if (victim_tid != -1) { // found last victim 34250b57cec5SDimitry Andric asleep = 0; 34260b57cec5SDimitry Andric } else if (!new_victim) { // no recent steals and we haven't already 34270b57cec5SDimitry Andric // used a new victim; select a random thread 34280b57cec5SDimitry Andric do { // Find a different thread to steal work from. 34290b57cec5SDimitry Andric // Pick a random thread. Initial plan was to cycle through all the 34300b57cec5SDimitry Andric // threads, and only return if we tried to steal from every thread, 34310b57cec5SDimitry Andric // and failed. Arch says that's not such a great idea. 34320b57cec5SDimitry Andric victim_tid = __kmp_get_random(thread) % (nthreads - 1); 34330b57cec5SDimitry Andric if (victim_tid >= tid) { 34340b57cec5SDimitry Andric ++victim_tid; // Adjusts random distribution to exclude self 34350b57cec5SDimitry Andric } 34360b57cec5SDimitry Andric // Found a potential victim 34370b57cec5SDimitry Andric other_thread = threads_data[victim_tid].td.td_thr; 34380b57cec5SDimitry Andric // There is a slight chance that __kmp_enable_tasking() did not wake 34390b57cec5SDimitry Andric // up all threads waiting at the barrier. If victim is sleeping, 34400b57cec5SDimitry Andric // then wake it up. Since we were going to pay the cache miss 34410b57cec5SDimitry Andric // penalty for referencing another thread's kmp_info_t struct 34420b57cec5SDimitry Andric // anyway, 34430b57cec5SDimitry Andric // the check shouldn't cost too much performance at this point. In 34440b57cec5SDimitry Andric // extra barrier mode, tasks do not sleep at the separate tasking 34450b57cec5SDimitry Andric // barrier, so this isn't a problem. 34460b57cec5SDimitry Andric asleep = 0; 34470b57cec5SDimitry Andric if ((__kmp_tasking_mode == tskm_task_teams) && 34480b57cec5SDimitry Andric (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && 34490b57cec5SDimitry Andric (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) != 34500b57cec5SDimitry Andric NULL)) { 34510b57cec5SDimitry Andric asleep = 1; 3452349cc55cSDimitry Andric __kmp_null_resume_wrapper(other_thread); 34530b57cec5SDimitry Andric // A sleeping thread should not have any tasks on it's queue. 34540b57cec5SDimitry Andric // There is a slight possibility that it resumes, steals a task 34550b57cec5SDimitry Andric // from another thread, which spawns more tasks, all in the time 34560b57cec5SDimitry Andric // that it takes this thread to check => don't write an assertion 34570b57cec5SDimitry Andric // that the victim's queue is empty. Try stealing from a 34580b57cec5SDimitry Andric // different thread. 34590b57cec5SDimitry Andric } 34600b57cec5SDimitry Andric } while (asleep); 34610b57cec5SDimitry Andric } 34620b57cec5SDimitry Andric 34630b57cec5SDimitry Andric if (!asleep) { 34640b57cec5SDimitry Andric // We have a victim to try to steal from 3465*0fca6ea1SDimitry Andric task = 3466*0fca6ea1SDimitry Andric __kmp_steal_task(victim_tid, gtid, task_team, unfinished_threads, 3467*0fca6ea1SDimitry Andric thread_finished, is_constrained); 34680b57cec5SDimitry Andric } 34690b57cec5SDimitry Andric if (task != NULL) { // set last stolen to victim 34700b57cec5SDimitry Andric if (threads_data[tid].td.td_deque_last_stolen != victim_tid) { 34710b57cec5SDimitry Andric threads_data[tid].td.td_deque_last_stolen = victim_tid; 34720b57cec5SDimitry Andric // The pre-refactored code did not try more than 1 successful new 34730b57cec5SDimitry Andric // vicitm, unless the last one generated more local tasks; 34740b57cec5SDimitry Andric // new_victim keeps track of this 34750b57cec5SDimitry Andric new_victim = 1; 34760b57cec5SDimitry Andric } 34770b57cec5SDimitry Andric } else { // No tasks found; unset last_stolen 34780b57cec5SDimitry Andric KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1); 34790b57cec5SDimitry Andric victim_tid = -2; // no successful victim found 34800b57cec5SDimitry Andric } 34810b57cec5SDimitry Andric } 34820b57cec5SDimitry Andric 3483e8d8bef9SDimitry Andric if (task == NULL) 3484e8d8bef9SDimitry Andric break; // break out of tasking loop 34850b57cec5SDimitry Andric 34860b57cec5SDimitry Andric // Found a task; execute it 34870b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY 34880b57cec5SDimitry Andric if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { 34890b57cec5SDimitry Andric if (itt_sync_obj == NULL) { // we are at fork barrier where we could not 34900b57cec5SDimitry Andric // get the object reliably 34910b57cec5SDimitry Andric itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); 34920b57cec5SDimitry Andric } 34930b57cec5SDimitry Andric __kmp_itt_task_starting(itt_sync_obj); 34940b57cec5SDimitry Andric } 34950b57cec5SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 34960b57cec5SDimitry Andric __kmp_invoke_task(gtid, task, current_task); 34970b57cec5SDimitry Andric #if USE_ITT_BUILD 34980b57cec5SDimitry Andric if (itt_sync_obj != NULL) 34990b57cec5SDimitry Andric __kmp_itt_task_finished(itt_sync_obj); 35000b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 35010b57cec5SDimitry Andric // If this thread is only partway through the barrier and the condition is 35020b57cec5SDimitry Andric // met, then return now, so that the barrier gather/release pattern can 35030b57cec5SDimitry Andric // proceed. If this thread is in the last spin loop in the barrier, 35040b57cec5SDimitry Andric // waiting to be released, we know that the termination condition will not 35055ffd83dbSDimitry Andric // be satisfied, so don't waste any cycles checking it. 35060b57cec5SDimitry Andric if (flag == NULL || (!final_spin && flag->done_check())) { 35070b57cec5SDimitry Andric KA_TRACE( 35080b57cec5SDimitry Andric 15, 35090b57cec5SDimitry Andric ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", 35100b57cec5SDimitry Andric gtid)); 35110b57cec5SDimitry Andric return TRUE; 35120b57cec5SDimitry Andric } 35130b57cec5SDimitry Andric if (thread->th.th_task_team == NULL) { 35140b57cec5SDimitry Andric break; 35150b57cec5SDimitry Andric } 35160b57cec5SDimitry Andric KMP_YIELD(__kmp_library == library_throughput); // Yield before next task 35170b57cec5SDimitry Andric // If execution of a stolen task results in more tasks being placed on our 35180b57cec5SDimitry Andric // run queue, reset use_own_tasks 35190b57cec5SDimitry Andric if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) { 35200b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned " 35210b57cec5SDimitry Andric "other tasks, restart\n", 35220b57cec5SDimitry Andric gtid)); 35230b57cec5SDimitry Andric use_own_tasks = 1; 35240b57cec5SDimitry Andric new_victim = 0; 35250b57cec5SDimitry Andric } 35260b57cec5SDimitry Andric } 35270b57cec5SDimitry Andric 35280b57cec5SDimitry Andric // The task source has been exhausted. If in final spin loop of barrier, 35290b57cec5SDimitry Andric // check if termination condition is satisfied. The work queue may be empty 35300b57cec5SDimitry Andric // but there might be proxy tasks still executing. 35310b57cec5SDimitry Andric if (final_spin && 35320b57cec5SDimitry Andric KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) { 35330b57cec5SDimitry Andric // First, decrement the #unfinished threads, if that has not already been 35340b57cec5SDimitry Andric // done. This decrement might be to the spin location, and result in the 35350b57cec5SDimitry Andric // termination condition being satisfied. 35360b57cec5SDimitry Andric if (!*thread_finished) { 3537349cc55cSDimitry Andric #if KMP_DEBUG 3538349cc55cSDimitry Andric kmp_int32 count = -1 + 3539349cc55cSDimitry Andric #endif 3540349cc55cSDimitry Andric KMP_ATOMIC_DEC(unfinished_threads); 35410b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec " 35420b57cec5SDimitry Andric "unfinished_threads to %d task_team=%p\n", 35430b57cec5SDimitry Andric gtid, count, task_team)); 35440b57cec5SDimitry Andric *thread_finished = TRUE; 35450b57cec5SDimitry Andric } 35460b57cec5SDimitry Andric 35470b57cec5SDimitry Andric // It is now unsafe to reference thread->th.th_team !!! 3548fe6060f1SDimitry Andric // Decrementing task_team->tt.tt_unfinished_threads can allow the primary 35490b57cec5SDimitry Andric // thread to pass through the barrier, where it might reset each thread's 35500b57cec5SDimitry Andric // th.th_team field for the next parallel region. If we can steal more 35510b57cec5SDimitry Andric // work, we know that this has not happened yet. 35520b57cec5SDimitry Andric if (flag != NULL && flag->done_check()) { 35530b57cec5SDimitry Andric KA_TRACE( 35540b57cec5SDimitry Andric 15, 35550b57cec5SDimitry Andric ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", 35560b57cec5SDimitry Andric gtid)); 35570b57cec5SDimitry Andric return TRUE; 35580b57cec5SDimitry Andric } 35590b57cec5SDimitry Andric } 35600b57cec5SDimitry Andric 3561fe6060f1SDimitry Andric // If this thread's task team is NULL, primary thread has recognized that 3562fe6060f1SDimitry Andric // there are no more tasks; bail out 35630b57cec5SDimitry Andric if (thread->th.th_task_team == NULL) { 35640b57cec5SDimitry Andric KA_TRACE(15, 35650b57cec5SDimitry Andric ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid)); 35660b57cec5SDimitry Andric return FALSE; 35670b57cec5SDimitry Andric } 35680b57cec5SDimitry Andric 356904eeddc0SDimitry Andric // Check the flag again to see if it has already done in case to be trapped 357004eeddc0SDimitry Andric // into infinite loop when a if0 task depends on a hidden helper task 357104eeddc0SDimitry Andric // outside any parallel region. Detached tasks are not impacted in this case 357204eeddc0SDimitry Andric // because the only thread executing this function has to execute the proxy 357304eeddc0SDimitry Andric // task so it is in another code path that has the same check. 357404eeddc0SDimitry Andric if (flag == NULL || (!final_spin && flag->done_check())) { 357504eeddc0SDimitry Andric KA_TRACE(15, 357604eeddc0SDimitry Andric ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", 357704eeddc0SDimitry Andric gtid)); 357804eeddc0SDimitry Andric return TRUE; 357904eeddc0SDimitry Andric } 358004eeddc0SDimitry Andric 35810b57cec5SDimitry Andric // We could be getting tasks from target constructs; if this is the only 35820b57cec5SDimitry Andric // thread, keep trying to execute tasks from own queue 3583e8d8bef9SDimitry Andric if (nthreads == 1 && 3584e8d8bef9SDimitry Andric KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks)) 35850b57cec5SDimitry Andric use_own_tasks = 1; 35860b57cec5SDimitry Andric else { 35870b57cec5SDimitry Andric KA_TRACE(15, 35880b57cec5SDimitry Andric ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid)); 35890b57cec5SDimitry Andric return FALSE; 35900b57cec5SDimitry Andric } 35910b57cec5SDimitry Andric } 35920b57cec5SDimitry Andric } 35930b57cec5SDimitry Andric 3594e8d8bef9SDimitry Andric template <bool C, bool S> 35950b57cec5SDimitry Andric int __kmp_execute_tasks_32( 3596e8d8bef9SDimitry Andric kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag, int final_spin, 35970b57cec5SDimitry Andric int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 35980b57cec5SDimitry Andric kmp_int32 is_constrained) { 35990b57cec5SDimitry Andric return __kmp_execute_tasks_template( 36000b57cec5SDimitry Andric thread, gtid, flag, final_spin, 36010b57cec5SDimitry Andric thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 36020b57cec5SDimitry Andric } 36030b57cec5SDimitry Andric 3604e8d8bef9SDimitry Andric template <bool C, bool S> 36050b57cec5SDimitry Andric int __kmp_execute_tasks_64( 3606e8d8bef9SDimitry Andric kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag, int final_spin, 36070b57cec5SDimitry Andric int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 36080b57cec5SDimitry Andric kmp_int32 is_constrained) { 36090b57cec5SDimitry Andric return __kmp_execute_tasks_template( 36100b57cec5SDimitry Andric thread, gtid, flag, final_spin, 36110b57cec5SDimitry Andric thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 36120b57cec5SDimitry Andric } 36130b57cec5SDimitry Andric 3614349cc55cSDimitry Andric template <bool C, bool S> 3615349cc55cSDimitry Andric int __kmp_atomic_execute_tasks_64( 3616349cc55cSDimitry Andric kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag, 3617349cc55cSDimitry Andric int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 3618349cc55cSDimitry Andric kmp_int32 is_constrained) { 3619349cc55cSDimitry Andric return __kmp_execute_tasks_template( 3620349cc55cSDimitry Andric thread, gtid, flag, final_spin, 3621349cc55cSDimitry Andric thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 3622349cc55cSDimitry Andric } 3623349cc55cSDimitry Andric 36240b57cec5SDimitry Andric int __kmp_execute_tasks_oncore( 36250b57cec5SDimitry Andric kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, 36260b57cec5SDimitry Andric int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 36270b57cec5SDimitry Andric kmp_int32 is_constrained) { 36280b57cec5SDimitry Andric return __kmp_execute_tasks_template( 36290b57cec5SDimitry Andric thread, gtid, flag, final_spin, 36300b57cec5SDimitry Andric thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 36310b57cec5SDimitry Andric } 36320b57cec5SDimitry Andric 3633e8d8bef9SDimitry Andric template int 3634e8d8bef9SDimitry Andric __kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32, 3635e8d8bef9SDimitry Andric kmp_flag_32<false, false> *, int, 3636e8d8bef9SDimitry Andric int *USE_ITT_BUILD_ARG(void *), kmp_int32); 3637e8d8bef9SDimitry Andric 3638e8d8bef9SDimitry Andric template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32, 3639e8d8bef9SDimitry Andric kmp_flag_64<false, true> *, 3640e8d8bef9SDimitry Andric int, 3641e8d8bef9SDimitry Andric int *USE_ITT_BUILD_ARG(void *), 3642e8d8bef9SDimitry Andric kmp_int32); 3643e8d8bef9SDimitry Andric 3644e8d8bef9SDimitry Andric template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32, 3645e8d8bef9SDimitry Andric kmp_flag_64<true, false> *, 3646e8d8bef9SDimitry Andric int, 3647e8d8bef9SDimitry Andric int *USE_ITT_BUILD_ARG(void *), 3648e8d8bef9SDimitry Andric kmp_int32); 3649e8d8bef9SDimitry Andric 3650349cc55cSDimitry Andric template int __kmp_atomic_execute_tasks_64<false, true>( 3651349cc55cSDimitry Andric kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int, 3652349cc55cSDimitry Andric int *USE_ITT_BUILD_ARG(void *), kmp_int32); 3653349cc55cSDimitry Andric 3654349cc55cSDimitry Andric template int __kmp_atomic_execute_tasks_64<true, false>( 3655349cc55cSDimitry Andric kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int, 3656349cc55cSDimitry Andric int *USE_ITT_BUILD_ARG(void *), kmp_int32); 3657349cc55cSDimitry Andric 36580b57cec5SDimitry Andric // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the 36590b57cec5SDimitry Andric // next barrier so they can assist in executing enqueued tasks. 36600b57cec5SDimitry Andric // First thread in allocates the task team atomically. 36610b57cec5SDimitry Andric static void __kmp_enable_tasking(kmp_task_team_t *task_team, 36620b57cec5SDimitry Andric kmp_info_t *this_thr) { 36630b57cec5SDimitry Andric kmp_thread_data_t *threads_data; 36640b57cec5SDimitry Andric int nthreads, i, is_init_thread; 36650b57cec5SDimitry Andric 36660b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n", 36670b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr))); 36680b57cec5SDimitry Andric 36690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task_team != NULL); 36700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL); 36710b57cec5SDimitry Andric 36720b57cec5SDimitry Andric nthreads = task_team->tt.tt_nproc; 36730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nthreads > 0); 36740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc); 36750b57cec5SDimitry Andric 36760b57cec5SDimitry Andric // Allocate or increase the size of threads_data if necessary 36770b57cec5SDimitry Andric is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team); 36780b57cec5SDimitry Andric 36790b57cec5SDimitry Andric if (!is_init_thread) { 36800b57cec5SDimitry Andric // Some other thread already set up the array. 36810b57cec5SDimitry Andric KA_TRACE( 36820b57cec5SDimitry Andric 20, 36830b57cec5SDimitry Andric ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n", 36840b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr))); 36850b57cec5SDimitry Andric return; 36860b57cec5SDimitry Andric } 36870b57cec5SDimitry Andric threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data); 36880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(threads_data != NULL); 36890b57cec5SDimitry Andric 36900b57cec5SDimitry Andric if (__kmp_tasking_mode == tskm_task_teams && 36910b57cec5SDimitry Andric (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) { 36920b57cec5SDimitry Andric // Release any threads sleeping at the barrier, so that they can steal 36930b57cec5SDimitry Andric // tasks and execute them. In extra barrier mode, tasks do not sleep 36940b57cec5SDimitry Andric // at the separate tasking barrier, so this isn't a problem. 36950b57cec5SDimitry Andric for (i = 0; i < nthreads; i++) { 3696349cc55cSDimitry Andric void *sleep_loc; 36970b57cec5SDimitry Andric kmp_info_t *thread = threads_data[i].td.td_thr; 36980b57cec5SDimitry Andric 36990b57cec5SDimitry Andric if (i == this_thr->th.th_info.ds.ds_tid) { 37000b57cec5SDimitry Andric continue; 37010b57cec5SDimitry Andric } 37020b57cec5SDimitry Andric // Since we haven't locked the thread's suspend mutex lock at this 37030b57cec5SDimitry Andric // point, there is a small window where a thread might be putting 37040b57cec5SDimitry Andric // itself to sleep, but hasn't set the th_sleep_loc field yet. 37050b57cec5SDimitry Andric // To work around this, __kmp_execute_tasks_template() periodically checks 37060b57cec5SDimitry Andric // see if other threads are sleeping (using the same random mechanism that 37070b57cec5SDimitry Andric // is used for task stealing) and awakens them if they are. 37080b57cec5SDimitry Andric if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) != 37090b57cec5SDimitry Andric NULL) { 37100b57cec5SDimitry Andric KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n", 37110b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr), 37120b57cec5SDimitry Andric __kmp_gtid_from_thread(thread))); 3713349cc55cSDimitry Andric __kmp_null_resume_wrapper(thread); 37140b57cec5SDimitry Andric } else { 37150b57cec5SDimitry Andric KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n", 37160b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr), 37170b57cec5SDimitry Andric __kmp_gtid_from_thread(thread))); 37180b57cec5SDimitry Andric } 37190b57cec5SDimitry Andric } 37200b57cec5SDimitry Andric } 37210b57cec5SDimitry Andric 37220b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n", 37230b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr))); 37240b57cec5SDimitry Andric } 37250b57cec5SDimitry Andric 37260b57cec5SDimitry Andric /* // TODO: Check the comment consistency 37270b57cec5SDimitry Andric * Utility routines for "task teams". A task team (kmp_task_t) is kind of 37280b57cec5SDimitry Andric * like a shadow of the kmp_team_t data struct, with a different lifetime. 37290b57cec5SDimitry Andric * After a child * thread checks into a barrier and calls __kmp_release() from 37300b57cec5SDimitry Andric * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no 37310b57cec5SDimitry Andric * longer assume that the kmp_team_t structure is intact (at any moment, the 3732fe6060f1SDimitry Andric * primary thread may exit the barrier code and free the team data structure, 37330b57cec5SDimitry Andric * and return the threads to the thread pool). 37340b57cec5SDimitry Andric * 3735480093f4SDimitry Andric * This does not work with the tasking code, as the thread is still 37360b57cec5SDimitry Andric * expected to participate in the execution of any tasks that may have been 37370b57cec5SDimitry Andric * spawned my a member of the team, and the thread still needs access to all 37380b57cec5SDimitry Andric * to each thread in the team, so that it can steal work from it. 37390b57cec5SDimitry Andric * 37400b57cec5SDimitry Andric * Enter the existence of the kmp_task_team_t struct. It employs a reference 3741fe6060f1SDimitry Andric * counting mechanism, and is allocated by the primary thread before calling 37420b57cec5SDimitry Andric * __kmp_<barrier_kind>_release, and then is release by the last thread to 37430b57cec5SDimitry Andric * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes 37440b57cec5SDimitry Andric * of the kmp_task_team_t structs for consecutive barriers can overlap 3745fe6060f1SDimitry Andric * (and will, unless the primary thread is the last thread to exit the barrier 37460b57cec5SDimitry Andric * release phase, which is not typical). The existence of such a struct is 37470b57cec5SDimitry Andric * useful outside the context of tasking. 37480b57cec5SDimitry Andric * 37490b57cec5SDimitry Andric * We currently use the existence of the threads array as an indicator that 37500b57cec5SDimitry Andric * tasks were spawned since the last barrier. If the structure is to be 37510b57cec5SDimitry Andric * useful outside the context of tasking, then this will have to change, but 37525ffd83dbSDimitry Andric * not setting the field minimizes the performance impact of tasking on 37530b57cec5SDimitry Andric * barriers, when no explicit tasks were spawned (pushed, actually). 37540b57cec5SDimitry Andric */ 37550b57cec5SDimitry Andric 37560b57cec5SDimitry Andric static kmp_task_team_t *__kmp_free_task_teams = 37570b57cec5SDimitry Andric NULL; // Free list for task_team data structures 37580b57cec5SDimitry Andric // Lock for task team data structures 37590b57cec5SDimitry Andric kmp_bootstrap_lock_t __kmp_task_team_lock = 37600b57cec5SDimitry Andric KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock); 37610b57cec5SDimitry Andric 37620b57cec5SDimitry Andric // __kmp_alloc_task_deque: 37630b57cec5SDimitry Andric // Allocates a task deque for a particular thread, and initialize the necessary 37640b57cec5SDimitry Andric // data structures relating to the deque. This only happens once per thread 37650b57cec5SDimitry Andric // per task team since task teams are recycled. No lock is needed during 37660b57cec5SDimitry Andric // allocation since each thread allocates its own deque. 37670b57cec5SDimitry Andric static void __kmp_alloc_task_deque(kmp_info_t *thread, 37680b57cec5SDimitry Andric kmp_thread_data_t *thread_data) { 37690b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock); 37700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL); 37710b57cec5SDimitry Andric 37720b57cec5SDimitry Andric // Initialize last stolen task field to "none" 37730b57cec5SDimitry Andric thread_data->td.td_deque_last_stolen = -1; 37740b57cec5SDimitry Andric 37750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0); 37760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0); 37770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0); 37780b57cec5SDimitry Andric 37790b57cec5SDimitry Andric KE_TRACE( 37800b57cec5SDimitry Andric 10, 37810b57cec5SDimitry Andric ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n", 37820b57cec5SDimitry Andric __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data)); 37830b57cec5SDimitry Andric // Allocate space for task deque, and zero the deque 37840b57cec5SDimitry Andric // Cannot use __kmp_thread_calloc() because threads not around for 37850b57cec5SDimitry Andric // kmp_reap_task_team( ). 37860b57cec5SDimitry Andric thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate( 37870b57cec5SDimitry Andric INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *)); 37880b57cec5SDimitry Andric thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE; 37890b57cec5SDimitry Andric } 37900b57cec5SDimitry Andric 37910b57cec5SDimitry Andric // __kmp_free_task_deque: 37920b57cec5SDimitry Andric // Deallocates a task deque for a particular thread. Happens at library 37930b57cec5SDimitry Andric // deallocation so don't need to reset all thread data fields. 37940b57cec5SDimitry Andric static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) { 37950b57cec5SDimitry Andric if (thread_data->td.td_deque != NULL) { 37960b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); 37970b57cec5SDimitry Andric TCW_4(thread_data->td.td_deque_ntasks, 0); 37980b57cec5SDimitry Andric __kmp_free(thread_data->td.td_deque); 37990b57cec5SDimitry Andric thread_data->td.td_deque = NULL; 38000b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 38010b57cec5SDimitry Andric } 38020b57cec5SDimitry Andric 38030b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK 38040b57cec5SDimitry Andric // GEH: Figure out what to do here for td_susp_tied_tasks 38050b57cec5SDimitry Andric if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) { 38060b57cec5SDimitry Andric __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data); 38070b57cec5SDimitry Andric } 38080b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK 38090b57cec5SDimitry Andric } 38100b57cec5SDimitry Andric 38110b57cec5SDimitry Andric // __kmp_realloc_task_threads_data: 38120b57cec5SDimitry Andric // Allocates a threads_data array for a task team, either by allocating an 38130b57cec5SDimitry Andric // initial array or enlarging an existing array. Only the first thread to get 3814480093f4SDimitry Andric // the lock allocs or enlarges the array and re-initializes the array elements. 38150b57cec5SDimitry Andric // That thread returns "TRUE", the rest return "FALSE". 38160b57cec5SDimitry Andric // Assumes that the new array size is given by task_team -> tt.tt_nproc. 38170b57cec5SDimitry Andric // The current size is given by task_team -> tt.tt_max_threads. 38180b57cec5SDimitry Andric static int __kmp_realloc_task_threads_data(kmp_info_t *thread, 38190b57cec5SDimitry Andric kmp_task_team_t *task_team) { 38200b57cec5SDimitry Andric kmp_thread_data_t **threads_data_p; 38210b57cec5SDimitry Andric kmp_int32 nthreads, maxthreads; 38220b57cec5SDimitry Andric int is_init_thread = FALSE; 38230b57cec5SDimitry Andric 38240b57cec5SDimitry Andric if (TCR_4(task_team->tt.tt_found_tasks)) { 38250b57cec5SDimitry Andric // Already reallocated and initialized. 38260b57cec5SDimitry Andric return FALSE; 38270b57cec5SDimitry Andric } 38280b57cec5SDimitry Andric 38290b57cec5SDimitry Andric threads_data_p = &task_team->tt.tt_threads_data; 38300b57cec5SDimitry Andric nthreads = task_team->tt.tt_nproc; 38310b57cec5SDimitry Andric maxthreads = task_team->tt.tt_max_threads; 38320b57cec5SDimitry Andric 38330b57cec5SDimitry Andric // All threads must lock when they encounter the first task of the implicit 38340b57cec5SDimitry Andric // task region to make sure threads_data fields are (re)initialized before 38350b57cec5SDimitry Andric // used. 38360b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock); 38370b57cec5SDimitry Andric 38380b57cec5SDimitry Andric if (!TCR_4(task_team->tt.tt_found_tasks)) { 38390b57cec5SDimitry Andric // first thread to enable tasking 38400b57cec5SDimitry Andric kmp_team_t *team = thread->th.th_team; 38410b57cec5SDimitry Andric int i; 38420b57cec5SDimitry Andric 38430b57cec5SDimitry Andric is_init_thread = TRUE; 38440b57cec5SDimitry Andric if (maxthreads < nthreads) { 38450b57cec5SDimitry Andric 38460b57cec5SDimitry Andric if (*threads_data_p != NULL) { 38470b57cec5SDimitry Andric kmp_thread_data_t *old_data = *threads_data_p; 38480b57cec5SDimitry Andric kmp_thread_data_t *new_data = NULL; 38490b57cec5SDimitry Andric 38500b57cec5SDimitry Andric KE_TRACE( 38510b57cec5SDimitry Andric 10, 38520b57cec5SDimitry Andric ("__kmp_realloc_task_threads_data: T#%d reallocating " 38530b57cec5SDimitry Andric "threads data for task_team %p, new_size = %d, old_size = %d\n", 38540b57cec5SDimitry Andric __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads)); 38550b57cec5SDimitry Andric // Reallocate threads_data to have more elements than current array 38560b57cec5SDimitry Andric // Cannot use __kmp_thread_realloc() because threads not around for 38570b57cec5SDimitry Andric // kmp_reap_task_team( ). Note all new array entries are initialized 38580b57cec5SDimitry Andric // to zero by __kmp_allocate(). 38590b57cec5SDimitry Andric new_data = (kmp_thread_data_t *)__kmp_allocate( 38600b57cec5SDimitry Andric nthreads * sizeof(kmp_thread_data_t)); 38610b57cec5SDimitry Andric // copy old data to new data 38620b57cec5SDimitry Andric KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t), 38630b57cec5SDimitry Andric (void *)old_data, maxthreads * sizeof(kmp_thread_data_t)); 38640b57cec5SDimitry Andric 38650b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK 38660b57cec5SDimitry Andric // GEH: Figure out if this is the right thing to do 38670b57cec5SDimitry Andric for (i = maxthreads; i < nthreads; i++) { 38680b57cec5SDimitry Andric kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; 38690b57cec5SDimitry Andric __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); 38700b57cec5SDimitry Andric } 38710b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK 38720b57cec5SDimitry Andric // Install the new data and free the old data 38730b57cec5SDimitry Andric (*threads_data_p) = new_data; 38740b57cec5SDimitry Andric __kmp_free(old_data); 38750b57cec5SDimitry Andric } else { 38760b57cec5SDimitry Andric KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating " 38770b57cec5SDimitry Andric "threads data for task_team %p, size = %d\n", 38780b57cec5SDimitry Andric __kmp_gtid_from_thread(thread), task_team, nthreads)); 38790b57cec5SDimitry Andric // Make the initial allocate for threads_data array, and zero entries 38800b57cec5SDimitry Andric // Cannot use __kmp_thread_calloc() because threads not around for 38810b57cec5SDimitry Andric // kmp_reap_task_team( ). 38820b57cec5SDimitry Andric *threads_data_p = (kmp_thread_data_t *)__kmp_allocate( 38830b57cec5SDimitry Andric nthreads * sizeof(kmp_thread_data_t)); 38840b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK 38850b57cec5SDimitry Andric // GEH: Figure out if this is the right thing to do 38860b57cec5SDimitry Andric for (i = 0; i < nthreads; i++) { 38870b57cec5SDimitry Andric kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; 38880b57cec5SDimitry Andric __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); 38890b57cec5SDimitry Andric } 38900b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK 38910b57cec5SDimitry Andric } 38920b57cec5SDimitry Andric task_team->tt.tt_max_threads = nthreads; 38930b57cec5SDimitry Andric } else { 38940b57cec5SDimitry Andric // If array has (more than) enough elements, go ahead and use it 38950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(*threads_data_p != NULL); 38960b57cec5SDimitry Andric } 38970b57cec5SDimitry Andric 38980b57cec5SDimitry Andric // initialize threads_data pointers back to thread_info structures 38990b57cec5SDimitry Andric for (i = 0; i < nthreads; i++) { 39000b57cec5SDimitry Andric kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; 39010b57cec5SDimitry Andric thread_data->td.td_thr = team->t.t_threads[i]; 39020b57cec5SDimitry Andric 39030b57cec5SDimitry Andric if (thread_data->td.td_deque_last_stolen >= nthreads) { 39040b57cec5SDimitry Andric // The last stolen field survives across teams / barrier, and the number 39050b57cec5SDimitry Andric // of threads may have changed. It's possible (likely?) that a new 39060b57cec5SDimitry Andric // parallel region will exhibit the same behavior as previous region. 39070b57cec5SDimitry Andric thread_data->td.td_deque_last_stolen = -1; 39080b57cec5SDimitry Andric } 39090b57cec5SDimitry Andric } 39100b57cec5SDimitry Andric 39110b57cec5SDimitry Andric KMP_MB(); 39120b57cec5SDimitry Andric TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE); 39130b57cec5SDimitry Andric } 39140b57cec5SDimitry Andric 39150b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock); 39160b57cec5SDimitry Andric return is_init_thread; 39170b57cec5SDimitry Andric } 39180b57cec5SDimitry Andric 39190b57cec5SDimitry Andric // __kmp_free_task_threads_data: 39200b57cec5SDimitry Andric // Deallocates a threads_data array for a task team, including any attached 39210b57cec5SDimitry Andric // tasking deques. Only occurs at library shutdown. 39220b57cec5SDimitry Andric static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) { 39230b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock); 39240b57cec5SDimitry Andric if (task_team->tt.tt_threads_data != NULL) { 39250b57cec5SDimitry Andric int i; 39260b57cec5SDimitry Andric for (i = 0; i < task_team->tt.tt_max_threads; i++) { 39270b57cec5SDimitry Andric __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]); 39280b57cec5SDimitry Andric } 39290b57cec5SDimitry Andric __kmp_free(task_team->tt.tt_threads_data); 39300b57cec5SDimitry Andric task_team->tt.tt_threads_data = NULL; 39310b57cec5SDimitry Andric } 39320b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock); 39330b57cec5SDimitry Andric } 39340b57cec5SDimitry Andric 393581ad6265SDimitry Andric // __kmp_free_task_pri_list: 393681ad6265SDimitry Andric // Deallocates tasking deques used for priority tasks. 393781ad6265SDimitry Andric // Only occurs at library shutdown. 393881ad6265SDimitry Andric static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) { 393981ad6265SDimitry Andric __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock); 394081ad6265SDimitry Andric if (task_team->tt.tt_task_pri_list != NULL) { 394181ad6265SDimitry Andric kmp_task_pri_t *list = task_team->tt.tt_task_pri_list; 394281ad6265SDimitry Andric while (list != NULL) { 394381ad6265SDimitry Andric kmp_task_pri_t *next = list->next; 394481ad6265SDimitry Andric __kmp_free_task_deque(&list->td); 394581ad6265SDimitry Andric __kmp_free(list); 394681ad6265SDimitry Andric list = next; 394781ad6265SDimitry Andric } 394881ad6265SDimitry Andric task_team->tt.tt_task_pri_list = NULL; 394981ad6265SDimitry Andric } 395081ad6265SDimitry Andric __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock); 395181ad6265SDimitry Andric } 395281ad6265SDimitry Andric 3953*0fca6ea1SDimitry Andric static inline void __kmp_task_team_init(kmp_task_team_t *task_team, 3954*0fca6ea1SDimitry Andric kmp_team_t *team) { 3955*0fca6ea1SDimitry Andric int team_nth = team->t.t_nproc; 3956*0fca6ea1SDimitry Andric // Only need to init if task team is isn't active or team size changed 3957*0fca6ea1SDimitry Andric if (!task_team->tt.tt_active || team_nth != task_team->tt.tt_nproc) { 3958*0fca6ea1SDimitry Andric TCW_4(task_team->tt.tt_found_tasks, FALSE); 3959*0fca6ea1SDimitry Andric TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); 3960*0fca6ea1SDimitry Andric TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE); 3961*0fca6ea1SDimitry Andric TCW_4(task_team->tt.tt_nproc, team_nth); 3962*0fca6ea1SDimitry Andric KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, team_nth); 3963*0fca6ea1SDimitry Andric TCW_4(task_team->tt.tt_active, TRUE); 3964*0fca6ea1SDimitry Andric } 3965*0fca6ea1SDimitry Andric } 3966*0fca6ea1SDimitry Andric 39670b57cec5SDimitry Andric // __kmp_allocate_task_team: 39680b57cec5SDimitry Andric // Allocates a task team associated with a specific team, taking it from 39690b57cec5SDimitry Andric // the global task team free list if possible. Also initializes data 39700b57cec5SDimitry Andric // structures. 39710b57cec5SDimitry Andric static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread, 39720b57cec5SDimitry Andric kmp_team_t *team) { 39730b57cec5SDimitry Andric kmp_task_team_t *task_team = NULL; 39740b57cec5SDimitry Andric 39750b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n", 39760b57cec5SDimitry Andric (thread ? __kmp_gtid_from_thread(thread) : -1), team)); 39770b57cec5SDimitry Andric 39780b57cec5SDimitry Andric if (TCR_PTR(__kmp_free_task_teams) != NULL) { 39790b57cec5SDimitry Andric // Take a task team from the task team pool 39800b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock); 39810b57cec5SDimitry Andric if (__kmp_free_task_teams != NULL) { 39820b57cec5SDimitry Andric task_team = __kmp_free_task_teams; 39830b57cec5SDimitry Andric TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next); 39840b57cec5SDimitry Andric task_team->tt.tt_next = NULL; 39850b57cec5SDimitry Andric } 39860b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_task_team_lock); 39870b57cec5SDimitry Andric } 39880b57cec5SDimitry Andric 39890b57cec5SDimitry Andric if (task_team == NULL) { 39900b57cec5SDimitry Andric KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating " 39910b57cec5SDimitry Andric "task team for team %p\n", 39920b57cec5SDimitry Andric __kmp_gtid_from_thread(thread), team)); 3993e8d8bef9SDimitry Andric // Allocate a new task team if one is not available. Cannot use 3994e8d8bef9SDimitry Andric // __kmp_thread_malloc because threads not around for kmp_reap_task_team. 39950b57cec5SDimitry Andric task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t)); 39960b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock); 399781ad6265SDimitry Andric __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock); 3998e8d8bef9SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG 3999e8d8bef9SDimitry Andric // suppress race conditions detection on synchronization flags in debug mode 4000e8d8bef9SDimitry Andric // this helps to analyze library internals eliminating false positives 4001e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4002e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4003e8d8bef9SDimitry Andric &task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks)); 4004e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4005e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4006e8d8bef9SDimitry Andric CCAST(kmp_uint32 *, &task_team->tt.tt_active), 4007e8d8bef9SDimitry Andric sizeof(task_team->tt.tt_active)); 4008e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */ 4009e8d8bef9SDimitry Andric // Note: __kmp_allocate zeroes returned memory, othewise we would need: 40100b57cec5SDimitry Andric // task_team->tt.tt_threads_data = NULL; 40110b57cec5SDimitry Andric // task_team->tt.tt_max_threads = 0; 40120b57cec5SDimitry Andric // task_team->tt.tt_next = NULL; 40130b57cec5SDimitry Andric } 40140b57cec5SDimitry Andric 4015*0fca6ea1SDimitry Andric __kmp_task_team_init(task_team, team); 40160b57cec5SDimitry Andric 40170b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p " 40180b57cec5SDimitry Andric "unfinished_threads init'd to %d\n", 40190b57cec5SDimitry Andric (thread ? __kmp_gtid_from_thread(thread) : -1), task_team, 40200b57cec5SDimitry Andric KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads))); 40210b57cec5SDimitry Andric return task_team; 40220b57cec5SDimitry Andric } 40230b57cec5SDimitry Andric 40240b57cec5SDimitry Andric // __kmp_free_task_team: 40250b57cec5SDimitry Andric // Frees the task team associated with a specific thread, and adds it 40260b57cec5SDimitry Andric // to the global task team free list. 40270b57cec5SDimitry Andric void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) { 40280b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n", 40290b57cec5SDimitry Andric thread ? __kmp_gtid_from_thread(thread) : -1, task_team)); 40300b57cec5SDimitry Andric 40310b57cec5SDimitry Andric // Put task team back on free list 40320b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock); 40330b57cec5SDimitry Andric 40340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL); 40350b57cec5SDimitry Andric task_team->tt.tt_next = __kmp_free_task_teams; 40360b57cec5SDimitry Andric TCW_PTR(__kmp_free_task_teams, task_team); 40370b57cec5SDimitry Andric 40380b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_task_team_lock); 40390b57cec5SDimitry Andric } 40400b57cec5SDimitry Andric 40410b57cec5SDimitry Andric // __kmp_reap_task_teams: 40420b57cec5SDimitry Andric // Free all the task teams on the task team free list. 40430b57cec5SDimitry Andric // Should only be done during library shutdown. 40440b57cec5SDimitry Andric // Cannot do anything that needs a thread structure or gtid since they are 40450b57cec5SDimitry Andric // already gone. 40460b57cec5SDimitry Andric void __kmp_reap_task_teams(void) { 40470b57cec5SDimitry Andric kmp_task_team_t *task_team; 40480b57cec5SDimitry Andric 40490b57cec5SDimitry Andric if (TCR_PTR(__kmp_free_task_teams) != NULL) { 40500b57cec5SDimitry Andric // Free all task_teams on the free list 40510b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock); 40520b57cec5SDimitry Andric while ((task_team = __kmp_free_task_teams) != NULL) { 40530b57cec5SDimitry Andric __kmp_free_task_teams = task_team->tt.tt_next; 40540b57cec5SDimitry Andric task_team->tt.tt_next = NULL; 40550b57cec5SDimitry Andric 40560b57cec5SDimitry Andric // Free threads_data if necessary 40570b57cec5SDimitry Andric if (task_team->tt.tt_threads_data != NULL) { 40580b57cec5SDimitry Andric __kmp_free_task_threads_data(task_team); 40590b57cec5SDimitry Andric } 406081ad6265SDimitry Andric if (task_team->tt.tt_task_pri_list != NULL) { 406181ad6265SDimitry Andric __kmp_free_task_pri_list(task_team); 406281ad6265SDimitry Andric } 40630b57cec5SDimitry Andric __kmp_free(task_team); 40640b57cec5SDimitry Andric } 40650b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_task_team_lock); 40660b57cec5SDimitry Andric } 40670b57cec5SDimitry Andric } 40680b57cec5SDimitry Andric 4069*0fca6ea1SDimitry Andric // View the array of two task team pointers as a pair of pointers: 4070*0fca6ea1SDimitry Andric // 1) a single task_team pointer 4071*0fca6ea1SDimitry Andric // 2) next pointer for stack 4072*0fca6ea1SDimitry Andric // Serial teams can create a stack of task teams for nested serial teams. 4073*0fca6ea1SDimitry Andric void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team) { 4074*0fca6ea1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 4075*0fca6ea1SDimitry Andric kmp_task_team_list_t *current = 4076*0fca6ea1SDimitry Andric (kmp_task_team_list_t *)(&team->t.t_task_team[0]); 4077*0fca6ea1SDimitry Andric kmp_task_team_list_t *node = 4078*0fca6ea1SDimitry Andric (kmp_task_team_list_t *)__kmp_allocate(sizeof(kmp_task_team_list_t)); 4079*0fca6ea1SDimitry Andric node->task_team = current->task_team; 4080*0fca6ea1SDimitry Andric node->next = current->next; 4081*0fca6ea1SDimitry Andric thread->th.th_task_team = current->task_team = NULL; 4082*0fca6ea1SDimitry Andric current->next = node; 4083*0fca6ea1SDimitry Andric } 4084*0fca6ea1SDimitry Andric 4085*0fca6ea1SDimitry Andric // Serial team pops a task team off the stack 4086*0fca6ea1SDimitry Andric void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team) { 4087*0fca6ea1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 4088*0fca6ea1SDimitry Andric kmp_task_team_list_t *current = 4089*0fca6ea1SDimitry Andric (kmp_task_team_list_t *)(&team->t.t_task_team[0]); 4090*0fca6ea1SDimitry Andric if (current->task_team) { 4091*0fca6ea1SDimitry Andric __kmp_free_task_team(thread, current->task_team); 4092*0fca6ea1SDimitry Andric } 4093*0fca6ea1SDimitry Andric kmp_task_team_list_t *next = current->next; 4094*0fca6ea1SDimitry Andric if (next) { 4095*0fca6ea1SDimitry Andric current->task_team = next->task_team; 4096*0fca6ea1SDimitry Andric current->next = next->next; 4097*0fca6ea1SDimitry Andric KMP_DEBUG_ASSERT(next != current); 4098*0fca6ea1SDimitry Andric __kmp_free(next); 4099*0fca6ea1SDimitry Andric thread->th.th_task_team = current->task_team; 4100*0fca6ea1SDimitry Andric } 4101*0fca6ea1SDimitry Andric } 4102*0fca6ea1SDimitry Andric 41030b57cec5SDimitry Andric // __kmp_wait_to_unref_task_teams: 41040b57cec5SDimitry Andric // Some threads could still be in the fork barrier release code, possibly 41050b57cec5SDimitry Andric // trying to steal tasks. Wait for each thread to unreference its task team. 41060b57cec5SDimitry Andric void __kmp_wait_to_unref_task_teams(void) { 41070b57cec5SDimitry Andric kmp_info_t *thread; 41080b57cec5SDimitry Andric kmp_uint32 spins; 410904eeddc0SDimitry Andric kmp_uint64 time; 41100b57cec5SDimitry Andric int done; 41110b57cec5SDimitry Andric 41120b57cec5SDimitry Andric KMP_INIT_YIELD(spins); 411304eeddc0SDimitry Andric KMP_INIT_BACKOFF(time); 41140b57cec5SDimitry Andric 41150b57cec5SDimitry Andric for (;;) { 41160b57cec5SDimitry Andric done = TRUE; 41170b57cec5SDimitry Andric 41180b57cec5SDimitry Andric // TODO: GEH - this may be is wrong because some sync would be necessary 41190b57cec5SDimitry Andric // in case threads are added to the pool during the traversal. Need to 41200b57cec5SDimitry Andric // verify that lock for thread pool is held when calling this routine. 41210b57cec5SDimitry Andric for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL; 41220b57cec5SDimitry Andric thread = thread->th.th_next_pool) { 41230b57cec5SDimitry Andric #if KMP_OS_WINDOWS 41240b57cec5SDimitry Andric DWORD exit_val; 41250b57cec5SDimitry Andric #endif 41260b57cec5SDimitry Andric if (TCR_PTR(thread->th.th_task_team) == NULL) { 41270b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n", 41280b57cec5SDimitry Andric __kmp_gtid_from_thread(thread))); 41290b57cec5SDimitry Andric continue; 41300b57cec5SDimitry Andric } 41310b57cec5SDimitry Andric #if KMP_OS_WINDOWS 41320b57cec5SDimitry Andric // TODO: GEH - add this check for Linux* OS / OS X* as well? 41330b57cec5SDimitry Andric if (!__kmp_is_thread_alive(thread, &exit_val)) { 41340b57cec5SDimitry Andric thread->th.th_task_team = NULL; 41350b57cec5SDimitry Andric continue; 41360b57cec5SDimitry Andric } 41370b57cec5SDimitry Andric #endif 41380b57cec5SDimitry Andric 41390b57cec5SDimitry Andric done = FALSE; // Because th_task_team pointer is not NULL for this thread 41400b57cec5SDimitry Andric 41410b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to " 41420b57cec5SDimitry Andric "unreference task_team\n", 41430b57cec5SDimitry Andric __kmp_gtid_from_thread(thread))); 41440b57cec5SDimitry Andric 41450b57cec5SDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 4146349cc55cSDimitry Andric void *sleep_loc; 41470b57cec5SDimitry Andric // If the thread is sleeping, awaken it. 41480b57cec5SDimitry Andric if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) != 41490b57cec5SDimitry Andric NULL) { 41500b57cec5SDimitry Andric KA_TRACE( 41510b57cec5SDimitry Andric 10, 41520b57cec5SDimitry Andric ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n", 41530b57cec5SDimitry Andric __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread))); 4154349cc55cSDimitry Andric __kmp_null_resume_wrapper(thread); 41550b57cec5SDimitry Andric } 41560b57cec5SDimitry Andric } 41570b57cec5SDimitry Andric } 41580b57cec5SDimitry Andric if (done) { 41590b57cec5SDimitry Andric break; 41600b57cec5SDimitry Andric } 41610b57cec5SDimitry Andric 41620b57cec5SDimitry Andric // If oversubscribed or have waited a bit, yield. 416304eeddc0SDimitry Andric KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); 41640b57cec5SDimitry Andric } 41650b57cec5SDimitry Andric } 41660b57cec5SDimitry Andric 41670b57cec5SDimitry Andric // __kmp_task_team_setup: Create a task_team for the current team, but use 41680b57cec5SDimitry Andric // an already created, unused one if it already exists. 4169*0fca6ea1SDimitry Andric void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team) { 41700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); 41710b57cec5SDimitry Andric 4172*0fca6ea1SDimitry Andric // For the serial and root teams, setup the first task team pointer to point 4173*0fca6ea1SDimitry Andric // to task team. The other pointer is a stack of task teams from previous 4174*0fca6ea1SDimitry Andric // serial levels. 4175*0fca6ea1SDimitry Andric if (team == this_thr->th.th_serial_team || 4176*0fca6ea1SDimitry Andric team == this_thr->th.th_root->r.r_root_team) { 4177*0fca6ea1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 4178*0fca6ea1SDimitry Andric if (team->t.t_task_team[0] == NULL) { 4179*0fca6ea1SDimitry Andric team->t.t_task_team[0] = __kmp_allocate_task_team(this_thr, team); 4180*0fca6ea1SDimitry Andric KA_TRACE( 4181*0fca6ea1SDimitry Andric 20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p" 4182*0fca6ea1SDimitry Andric " for serial/root team %p\n", 4183*0fca6ea1SDimitry Andric __kmp_gtid_from_thread(this_thr), team->t.t_task_team[0], team)); 4184*0fca6ea1SDimitry Andric 4185*0fca6ea1SDimitry Andric } else 4186*0fca6ea1SDimitry Andric __kmp_task_team_init(team->t.t_task_team[0], team); 4187*0fca6ea1SDimitry Andric return; 4188*0fca6ea1SDimitry Andric } 4189*0fca6ea1SDimitry Andric 41900b57cec5SDimitry Andric // If this task_team hasn't been created yet, allocate it. It will be used in 41910b57cec5SDimitry Andric // the region after the next. 41920b57cec5SDimitry Andric // If it exists, it is the current task team and shouldn't be touched yet as 41930b57cec5SDimitry Andric // it may still be in use. 4194*0fca6ea1SDimitry Andric if (team->t.t_task_team[this_thr->th.th_task_state] == NULL) { 41950b57cec5SDimitry Andric team->t.t_task_team[this_thr->th.th_task_state] = 41960b57cec5SDimitry Andric __kmp_allocate_task_team(this_thr, team); 4197fe6060f1SDimitry Andric KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p" 41980b57cec5SDimitry Andric " for team %d at parity=%d\n", 41990b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr), 4200fe6060f1SDimitry Andric team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id, 42010b57cec5SDimitry Andric this_thr->th.th_task_state)); 42020b57cec5SDimitry Andric } 42030b57cec5SDimitry Andric 42040b57cec5SDimitry Andric // After threads exit the release, they will call sync, and then point to this 42050b57cec5SDimitry Andric // other task_team; make sure it is allocated and properly initialized. As 42060b57cec5SDimitry Andric // threads spin in the barrier release phase, they will continue to use the 42070b57cec5SDimitry Andric // previous task_team struct(above), until they receive the signal to stop 42080b57cec5SDimitry Andric // checking for tasks (they can't safely reference the kmp_team_t struct, 4209*0fca6ea1SDimitry Andric // which could be reallocated by the primary thread). 42100b57cec5SDimitry Andric int other_team = 1 - this_thr->th.th_task_state; 4211fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2); 42120b57cec5SDimitry Andric if (team->t.t_task_team[other_team] == NULL) { // setup other team as well 4213*0fca6ea1SDimitry Andric team->t.t_task_team[other_team] = __kmp_allocate_task_team(this_thr, team); 4214fe6060f1SDimitry Andric KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created second new " 42150b57cec5SDimitry Andric "task_team %p for team %d at parity=%d\n", 42160b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr), 4217fe6060f1SDimitry Andric team->t.t_task_team[other_team], team->t.t_id, other_team)); 42180b57cec5SDimitry Andric } else { // Leave the old task team struct in place for the upcoming region; 42190b57cec5SDimitry Andric // adjust as needed 42200b57cec5SDimitry Andric kmp_task_team_t *task_team = team->t.t_task_team[other_team]; 4221*0fca6ea1SDimitry Andric __kmp_task_team_init(task_team, team); 42220b57cec5SDimitry Andric // if team size has changed, the first thread to enable tasking will 42230b57cec5SDimitry Andric // realloc threads_data if necessary 4224fe6060f1SDimitry Andric KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d reset next task_team " 42250b57cec5SDimitry Andric "%p for team %d at parity=%d\n", 42260b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr), 4227fe6060f1SDimitry Andric team->t.t_task_team[other_team], team->t.t_id, other_team)); 42280b57cec5SDimitry Andric } 4229e8d8bef9SDimitry Andric 4230e8d8bef9SDimitry Andric // For regular thread, task enabling should be called when the task is going 4231e8d8bef9SDimitry Andric // to be pushed to a dequeue. However, for the hidden helper thread, we need 4232e8d8bef9SDimitry Andric // it ahead of time so that some operations can be performed without race 4233e8d8bef9SDimitry Andric // condition. 4234e8d8bef9SDimitry Andric if (this_thr == __kmp_hidden_helper_main_thread) { 4235e8d8bef9SDimitry Andric for (int i = 0; i < 2; ++i) { 4236e8d8bef9SDimitry Andric kmp_task_team_t *task_team = team->t.t_task_team[i]; 4237e8d8bef9SDimitry Andric if (KMP_TASKING_ENABLED(task_team)) { 4238e8d8bef9SDimitry Andric continue; 4239e8d8bef9SDimitry Andric } 4240e8d8bef9SDimitry Andric __kmp_enable_tasking(task_team, this_thr); 4241e8d8bef9SDimitry Andric for (int j = 0; j < task_team->tt.tt_nproc; ++j) { 4242e8d8bef9SDimitry Andric kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j]; 4243e8d8bef9SDimitry Andric if (thread_data->td.td_deque == NULL) { 4244e8d8bef9SDimitry Andric __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data); 4245e8d8bef9SDimitry Andric } 4246e8d8bef9SDimitry Andric } 4247e8d8bef9SDimitry Andric } 4248e8d8bef9SDimitry Andric } 42490b57cec5SDimitry Andric } 42500b57cec5SDimitry Andric 42510b57cec5SDimitry Andric // __kmp_task_team_sync: Propagation of task team data from team to threads 42520b57cec5SDimitry Andric // which happens just after the release phase of a team barrier. This may be 4253*0fca6ea1SDimitry Andric // called by any thread. This is not called for serial or root teams. 42540b57cec5SDimitry Andric void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) { 42550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); 4256*0fca6ea1SDimitry Andric KMP_DEBUG_ASSERT(team != this_thr->th.th_serial_team); 4257*0fca6ea1SDimitry Andric KMP_DEBUG_ASSERT(team != this_thr->th.th_root->r.r_root_team); 42580b57cec5SDimitry Andric 42590b57cec5SDimitry Andric // Toggle the th_task_state field, to switch which task_team this thread 42600b57cec5SDimitry Andric // refers to 4261e8d8bef9SDimitry Andric this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state); 4262e8d8bef9SDimitry Andric 42630b57cec5SDimitry Andric // It is now safe to propagate the task team pointer from the team struct to 42640b57cec5SDimitry Andric // the current thread. 42650b57cec5SDimitry Andric TCW_PTR(this_thr->th.th_task_team, 42660b57cec5SDimitry Andric team->t.t_task_team[this_thr->th.th_task_state]); 42670b57cec5SDimitry Andric KA_TRACE(20, 42680b57cec5SDimitry Andric ("__kmp_task_team_sync: Thread T#%d task team switched to task_team " 42690b57cec5SDimitry Andric "%p from Team #%d (parity=%d)\n", 42700b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team, 4271fe6060f1SDimitry Andric team->t.t_id, this_thr->th.th_task_state)); 42720b57cec5SDimitry Andric } 42730b57cec5SDimitry Andric 4274fe6060f1SDimitry Andric // __kmp_task_team_wait: Primary thread waits for outstanding tasks after the 4275*0fca6ea1SDimitry Andric // barrier gather phase. Only called by the primary thread. 42760b57cec5SDimitry Andric // 42770b57cec5SDimitry Andric // wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off 4278fe6060f1SDimitry Andric // by passing in 0 optionally as the last argument. When wait is zero, primary 42790b57cec5SDimitry Andric // thread does not wait for unfinished_threads to reach 0. 42800b57cec5SDimitry Andric void __kmp_task_team_wait( 42810b57cec5SDimitry Andric kmp_info_t *this_thr, 42820b57cec5SDimitry Andric kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) { 42830b57cec5SDimitry Andric kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; 42840b57cec5SDimitry Andric 42850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); 42860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team); 42870b57cec5SDimitry Andric 42880b57cec5SDimitry Andric if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) { 42890b57cec5SDimitry Andric if (wait) { 4290fe6060f1SDimitry Andric KA_TRACE(20, ("__kmp_task_team_wait: Primary T#%d waiting for all tasks " 42910b57cec5SDimitry Andric "(for unfinished_threads to reach 0) on task_team = %p\n", 42920b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr), task_team)); 42930b57cec5SDimitry Andric // Worker threads may have dropped through to release phase, but could 42940b57cec5SDimitry Andric // still be executing tasks. Wait here for tasks to complete. To avoid 4295fe6060f1SDimitry Andric // memory contention, only primary thread checks termination condition. 4296e8d8bef9SDimitry Andric kmp_flag_32<false, false> flag( 4297e8d8bef9SDimitry Andric RCAST(std::atomic<kmp_uint32> *, 42980b57cec5SDimitry Andric &task_team->tt.tt_unfinished_threads), 42990b57cec5SDimitry Andric 0U); 43000b57cec5SDimitry Andric flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); 43010b57cec5SDimitry Andric } 43020b57cec5SDimitry Andric // Deactivate the old task team, so that the worker threads will stop 43030b57cec5SDimitry Andric // referencing it while spinning. 43040b57cec5SDimitry Andric KA_TRACE( 43050b57cec5SDimitry Andric 20, 4306fe6060f1SDimitry Andric ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: " 43070b57cec5SDimitry Andric "setting active to false, setting local and team's pointer to NULL\n", 43080b57cec5SDimitry Andric __kmp_gtid_from_thread(this_thr), task_team)); 43090b57cec5SDimitry Andric TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE); 431004eeddc0SDimitry Andric TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE); 43110b57cec5SDimitry Andric KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0); 43120b57cec5SDimitry Andric TCW_SYNC_4(task_team->tt.tt_active, FALSE); 43130b57cec5SDimitry Andric KMP_MB(); 43140b57cec5SDimitry Andric 43150b57cec5SDimitry Andric TCW_PTR(this_thr->th.th_task_team, NULL); 43160b57cec5SDimitry Andric } 43170b57cec5SDimitry Andric } 43180b57cec5SDimitry Andric 43190b57cec5SDimitry Andric // __kmp_tasking_barrier: 4320e8d8bef9SDimitry Andric // This routine is called only when __kmp_tasking_mode == tskm_extra_barrier. 43210b57cec5SDimitry Andric // Internal function to execute all tasks prior to a regular barrier or a join 43220b57cec5SDimitry Andric // barrier. It is a full barrier itself, which unfortunately turns regular 43230b57cec5SDimitry Andric // barriers into double barriers and join barriers into 1 1/2 barriers. 43240b57cec5SDimitry Andric void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) { 43250b57cec5SDimitry Andric std::atomic<kmp_uint32> *spin = RCAST( 43260b57cec5SDimitry Andric std::atomic<kmp_uint32> *, 43270b57cec5SDimitry Andric &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads); 43280b57cec5SDimitry Andric int flag = FALSE; 43290b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier); 43300b57cec5SDimitry Andric 43310b57cec5SDimitry Andric #if USE_ITT_BUILD 43320b57cec5SDimitry Andric KMP_FSYNC_SPIN_INIT(spin, NULL); 43330b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 4334e8d8bef9SDimitry Andric kmp_flag_32<false, false> spin_flag(spin, 0U); 43350b57cec5SDimitry Andric while (!spin_flag.execute_tasks(thread, gtid, TRUE, 43360b57cec5SDimitry Andric &flag USE_ITT_BUILD_ARG(NULL), 0)) { 43370b57cec5SDimitry Andric #if USE_ITT_BUILD 43380b57cec5SDimitry Andric // TODO: What about itt_sync_obj?? 43390b57cec5SDimitry Andric KMP_FSYNC_SPIN_PREPARE(RCAST(void *, spin)); 43400b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 43410b57cec5SDimitry Andric 43420b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 43430b57cec5SDimitry Andric if (__kmp_global.g.g_abort) 43440b57cec5SDimitry Andric __kmp_abort_thread(); 43450b57cec5SDimitry Andric break; 43460b57cec5SDimitry Andric } 43470b57cec5SDimitry Andric KMP_YIELD(TRUE); 43480b57cec5SDimitry Andric } 43490b57cec5SDimitry Andric #if USE_ITT_BUILD 43500b57cec5SDimitry Andric KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin)); 43510b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 43520b57cec5SDimitry Andric } 43530b57cec5SDimitry Andric 43540b57cec5SDimitry Andric // __kmp_give_task puts a task into a given thread queue if: 43550b57cec5SDimitry Andric // - the queue for that thread was created 43560b57cec5SDimitry Andric // - there's space in that queue 43570b57cec5SDimitry Andric // Because of this, __kmp_push_task needs to check if there's space after 43580b57cec5SDimitry Andric // getting the lock 43590b57cec5SDimitry Andric static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task, 43600b57cec5SDimitry Andric kmp_int32 pass) { 43610b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 43620b57cec5SDimitry Andric kmp_task_team_t *task_team = taskdata->td_task_team; 43630b57cec5SDimitry Andric 43640b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", 43650b57cec5SDimitry Andric taskdata, tid)); 43660b57cec5SDimitry Andric 43670b57cec5SDimitry Andric // If task_team is NULL something went really bad... 43680b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task_team != NULL); 43690b57cec5SDimitry Andric 43700b57cec5SDimitry Andric bool result = false; 43710b57cec5SDimitry Andric kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid]; 43720b57cec5SDimitry Andric 43730b57cec5SDimitry Andric if (thread_data->td.td_deque == NULL) { 43740b57cec5SDimitry Andric // There's no queue in this thread, go find another one 43750b57cec5SDimitry Andric // We're guaranteed that at least one thread has a queue 43760b57cec5SDimitry Andric KA_TRACE(30, 43770b57cec5SDimitry Andric ("__kmp_give_task: thread %d has no queue while giving task %p.\n", 43780b57cec5SDimitry Andric tid, taskdata)); 43790b57cec5SDimitry Andric return result; 43800b57cec5SDimitry Andric } 43810b57cec5SDimitry Andric 43820b57cec5SDimitry Andric if (TCR_4(thread_data->td.td_deque_ntasks) >= 43830b57cec5SDimitry Andric TASK_DEQUE_SIZE(thread_data->td)) { 43840b57cec5SDimitry Andric KA_TRACE( 43850b57cec5SDimitry Andric 30, 43860b57cec5SDimitry Andric ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", 43870b57cec5SDimitry Andric taskdata, tid)); 43880b57cec5SDimitry Andric 43890b57cec5SDimitry Andric // if this deque is bigger than the pass ratio give a chance to another 43900b57cec5SDimitry Andric // thread 43910b57cec5SDimitry Andric if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass) 43920b57cec5SDimitry Andric return result; 43930b57cec5SDimitry Andric 43940b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); 43955ffd83dbSDimitry Andric if (TCR_4(thread_data->td.td_deque_ntasks) >= 43965ffd83dbSDimitry Andric TASK_DEQUE_SIZE(thread_data->td)) { 43975ffd83dbSDimitry Andric // expand deque to push the task which is not allowed to execute 43980b57cec5SDimitry Andric __kmp_realloc_task_deque(thread, thread_data); 43995ffd83dbSDimitry Andric } 44000b57cec5SDimitry Andric 44010b57cec5SDimitry Andric } else { 44020b57cec5SDimitry Andric 44030b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); 44040b57cec5SDimitry Andric 44050b57cec5SDimitry Andric if (TCR_4(thread_data->td.td_deque_ntasks) >= 44060b57cec5SDimitry Andric TASK_DEQUE_SIZE(thread_data->td)) { 44070b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to " 44080b57cec5SDimitry Andric "thread %d.\n", 44090b57cec5SDimitry Andric taskdata, tid)); 44100b57cec5SDimitry Andric 44110b57cec5SDimitry Andric // if this deque is bigger than the pass ratio give a chance to another 44120b57cec5SDimitry Andric // thread 44130b57cec5SDimitry Andric if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass) 44140b57cec5SDimitry Andric goto release_and_exit; 44150b57cec5SDimitry Andric 44160b57cec5SDimitry Andric __kmp_realloc_task_deque(thread, thread_data); 44170b57cec5SDimitry Andric } 44180b57cec5SDimitry Andric } 44190b57cec5SDimitry Andric 44200b57cec5SDimitry Andric // lock is held here, and there is space in the deque 44210b57cec5SDimitry Andric 44220b57cec5SDimitry Andric thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata; 44230b57cec5SDimitry Andric // Wrap index. 44240b57cec5SDimitry Andric thread_data->td.td_deque_tail = 44250b57cec5SDimitry Andric (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td); 44260b57cec5SDimitry Andric TCW_4(thread_data->td.td_deque_ntasks, 44270b57cec5SDimitry Andric TCR_4(thread_data->td.td_deque_ntasks) + 1); 44280b57cec5SDimitry Andric 44290b57cec5SDimitry Andric result = true; 44300b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", 44310b57cec5SDimitry Andric taskdata, tid)); 44320b57cec5SDimitry Andric 44330b57cec5SDimitry Andric release_and_exit: 44340b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); 44350b57cec5SDimitry Andric 44360b57cec5SDimitry Andric return result; 44370b57cec5SDimitry Andric } 44380b57cec5SDimitry Andric 4439fe6060f1SDimitry Andric #define PROXY_TASK_FLAG 0x40000000 44400b57cec5SDimitry Andric /* The finish of the proxy tasks is divided in two pieces: 44410b57cec5SDimitry Andric - the top half is the one that can be done from a thread outside the team 44420b57cec5SDimitry Andric - the bottom half must be run from a thread within the team 44430b57cec5SDimitry Andric 44440b57cec5SDimitry Andric In order to run the bottom half the task gets queued back into one of the 44450b57cec5SDimitry Andric threads of the team. Once the td_incomplete_child_task counter of the parent 44460b57cec5SDimitry Andric is decremented the threads can leave the barriers. So, the bottom half needs 44470b57cec5SDimitry Andric to be queued before the counter is decremented. The top half is therefore 44480b57cec5SDimitry Andric divided in two parts: 44490b57cec5SDimitry Andric - things that can be run before queuing the bottom half 44500b57cec5SDimitry Andric - things that must be run after queuing the bottom half 44510b57cec5SDimitry Andric 44520b57cec5SDimitry Andric This creates a second race as the bottom half can free the task before the 44530b57cec5SDimitry Andric second top half is executed. To avoid this we use the 44540b57cec5SDimitry Andric td_incomplete_child_task of the proxy task to synchronize the top and bottom 44550b57cec5SDimitry Andric half. */ 44560b57cec5SDimitry Andric static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) { 44570b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); 44580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); 44590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); 44600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); 44610b57cec5SDimitry Andric 44620b57cec5SDimitry Andric taskdata->td_flags.complete = 1; // mark the task as completed 446306c3fb27SDimitry Andric #if OMPX_TASKGRAPH 446406c3fb27SDimitry Andric taskdata->td_flags.onced = 1; 446506c3fb27SDimitry Andric #endif 44660b57cec5SDimitry Andric 44670b57cec5SDimitry Andric if (taskdata->td_taskgroup) 44680b57cec5SDimitry Andric KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count); 44690b57cec5SDimitry Andric 44700b57cec5SDimitry Andric // Create an imaginary children for this task so the bottom half cannot 44710b57cec5SDimitry Andric // release the task before we have completed the second top half 4472fe6060f1SDimitry Andric KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG); 44730b57cec5SDimitry Andric } 44740b57cec5SDimitry Andric 44750b57cec5SDimitry Andric static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) { 4476349cc55cSDimitry Andric #if KMP_DEBUG 44770b57cec5SDimitry Andric kmp_int32 children = 0; 44780b57cec5SDimitry Andric // Predecrement simulated by "- 1" calculation 4479349cc55cSDimitry Andric children = -1 + 4480349cc55cSDimitry Andric #endif 4481349cc55cSDimitry Andric KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks); 44820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(children >= 0); 44830b57cec5SDimitry Andric 44840b57cec5SDimitry Andric // Remove the imaginary children 4485fe6060f1SDimitry Andric KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG); 44860b57cec5SDimitry Andric } 44870b57cec5SDimitry Andric 44880b57cec5SDimitry Andric static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) { 44890b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); 44900b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 44910b57cec5SDimitry Andric 44920b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); 44930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 44940b57cec5SDimitry Andric 1); // top half must run before bottom half 44950b57cec5SDimitry Andric 44960b57cec5SDimitry Andric // We need to wait to make sure the top half is finished 44970b57cec5SDimitry Andric // Spinning here should be ok as this should happen quickly 4498fe6060f1SDimitry Andric while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) & 4499fe6060f1SDimitry Andric PROXY_TASK_FLAG) > 0) 45000b57cec5SDimitry Andric ; 45010b57cec5SDimitry Andric 45020b57cec5SDimitry Andric __kmp_release_deps(gtid, taskdata); 45030b57cec5SDimitry Andric __kmp_free_task_and_ancestors(gtid, taskdata, thread); 45040b57cec5SDimitry Andric } 45050b57cec5SDimitry Andric 45060b57cec5SDimitry Andric /*! 45070b57cec5SDimitry Andric @ingroup TASKING 45080b57cec5SDimitry Andric @param gtid Global Thread ID of encountering thread 45090b57cec5SDimitry Andric @param ptask Task which execution is completed 45100b57cec5SDimitry Andric 4511480093f4SDimitry Andric Execute the completion of a proxy task from a thread of that is part of the 45120b57cec5SDimitry Andric team. Run first and bottom halves directly. 45130b57cec5SDimitry Andric */ 45140b57cec5SDimitry Andric void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) { 45150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ptask != NULL); 45160b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); 45170b57cec5SDimitry Andric KA_TRACE( 45180b57cec5SDimitry Andric 10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", 45190b57cec5SDimitry Andric gtid, taskdata)); 4520e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 45210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); 45220b57cec5SDimitry Andric 45230b57cec5SDimitry Andric __kmp_first_top_half_finish_proxy(taskdata); 45240b57cec5SDimitry Andric __kmp_second_top_half_finish_proxy(taskdata); 45250b57cec5SDimitry Andric __kmp_bottom_half_finish_proxy(gtid, ptask); 45260b57cec5SDimitry Andric 45270b57cec5SDimitry Andric KA_TRACE(10, 45280b57cec5SDimitry Andric ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", 45290b57cec5SDimitry Andric gtid, taskdata)); 45300b57cec5SDimitry Andric } 45310b57cec5SDimitry Andric 4532fe6060f1SDimitry Andric void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) { 4533fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(ptask != NULL); 4534fe6060f1SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); 4535fe6060f1SDimitry Andric 4536fe6060f1SDimitry Andric // Enqueue task to complete bottom half completion from a thread within the 4537fe6060f1SDimitry Andric // corresponding team 4538fe6060f1SDimitry Andric kmp_team_t *team = taskdata->td_team; 4539fe6060f1SDimitry Andric kmp_int32 nthreads = team->t.t_nproc; 4540fe6060f1SDimitry Andric kmp_info_t *thread; 4541fe6060f1SDimitry Andric 4542fe6060f1SDimitry Andric // This should be similar to start_k = __kmp_get_random( thread ) % nthreads 4543fe6060f1SDimitry Andric // but we cannot use __kmp_get_random here 4544349cc55cSDimitry Andric kmp_int32 start_k = start % nthreads; 4545fe6060f1SDimitry Andric kmp_int32 pass = 1; 4546fe6060f1SDimitry Andric kmp_int32 k = start_k; 4547fe6060f1SDimitry Andric 4548fe6060f1SDimitry Andric do { 4549fe6060f1SDimitry Andric // For now we're just linearly trying to find a thread 4550fe6060f1SDimitry Andric thread = team->t.t_threads[k]; 4551fe6060f1SDimitry Andric k = (k + 1) % nthreads; 4552fe6060f1SDimitry Andric 4553fe6060f1SDimitry Andric // we did a full pass through all the threads 4554fe6060f1SDimitry Andric if (k == start_k) 4555fe6060f1SDimitry Andric pass = pass << 1; 4556fe6060f1SDimitry Andric 4557fe6060f1SDimitry Andric } while (!__kmp_give_task(thread, k, ptask, pass)); 455881ad6265SDimitry Andric 455981ad6265SDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && __kmp_wpolicy_passive) { 456081ad6265SDimitry Andric // awake at least one thread to execute given task 456181ad6265SDimitry Andric for (int i = 0; i < nthreads; ++i) { 456281ad6265SDimitry Andric thread = team->t.t_threads[i]; 456381ad6265SDimitry Andric if (thread->th.th_sleep_loc != NULL) { 456481ad6265SDimitry Andric __kmp_null_resume_wrapper(thread); 456581ad6265SDimitry Andric break; 456681ad6265SDimitry Andric } 456781ad6265SDimitry Andric } 456881ad6265SDimitry Andric } 4569fe6060f1SDimitry Andric } 4570fe6060f1SDimitry Andric 45710b57cec5SDimitry Andric /*! 45720b57cec5SDimitry Andric @ingroup TASKING 45730b57cec5SDimitry Andric @param ptask Task which execution is completed 45740b57cec5SDimitry Andric 4575480093f4SDimitry Andric Execute the completion of a proxy task from a thread that could not belong to 45760b57cec5SDimitry Andric the team. 45770b57cec5SDimitry Andric */ 45780b57cec5SDimitry Andric void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) { 45790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ptask != NULL); 45800b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); 45810b57cec5SDimitry Andric 45820b57cec5SDimitry Andric KA_TRACE( 45830b57cec5SDimitry Andric 10, 45840b57cec5SDimitry Andric ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", 45850b57cec5SDimitry Andric taskdata)); 45860b57cec5SDimitry Andric 45870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); 45880b57cec5SDimitry Andric 45890b57cec5SDimitry Andric __kmp_first_top_half_finish_proxy(taskdata); 45900b57cec5SDimitry Andric 4591fe6060f1SDimitry Andric __kmpc_give_task(ptask); 45920b57cec5SDimitry Andric 45930b57cec5SDimitry Andric __kmp_second_top_half_finish_proxy(taskdata); 45940b57cec5SDimitry Andric 45950b57cec5SDimitry Andric KA_TRACE( 45960b57cec5SDimitry Andric 10, 45970b57cec5SDimitry Andric ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", 45980b57cec5SDimitry Andric taskdata)); 45990b57cec5SDimitry Andric } 46000b57cec5SDimitry Andric 46010b57cec5SDimitry Andric kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, int gtid, 46020b57cec5SDimitry Andric kmp_task_t *task) { 46030b57cec5SDimitry Andric kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); 46040b57cec5SDimitry Andric if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) { 46050b57cec5SDimitry Andric td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION; 46060b57cec5SDimitry Andric td->td_allow_completion_event.ed.task = task; 46070b57cec5SDimitry Andric __kmp_init_tas_lock(&td->td_allow_completion_event.lock); 46080b57cec5SDimitry Andric } 46090b57cec5SDimitry Andric return &td->td_allow_completion_event; 46100b57cec5SDimitry Andric } 46110b57cec5SDimitry Andric 46120b57cec5SDimitry Andric void __kmp_fulfill_event(kmp_event_t *event) { 46130b57cec5SDimitry Andric if (event->type == KMP_EVENT_ALLOW_COMPLETION) { 46140b57cec5SDimitry Andric kmp_task_t *ptask = event->ed.task; 46150b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); 46160b57cec5SDimitry Andric bool detached = false; 46170b57cec5SDimitry Andric int gtid = __kmp_get_gtid(); 46180b57cec5SDimitry Andric 46195ffd83dbSDimitry Andric // The associated task might have completed or could be completing at this 46200b57cec5SDimitry Andric // point. 46210b57cec5SDimitry Andric // We need to take the lock to avoid races 46220b57cec5SDimitry Andric __kmp_acquire_tas_lock(&event->lock, gtid); 46235ffd83dbSDimitry Andric if (taskdata->td_flags.proxy == TASK_PROXY) { 46240b57cec5SDimitry Andric detached = true; 46255ffd83dbSDimitry Andric } else { 46265ffd83dbSDimitry Andric #if OMPT_SUPPORT 46275ffd83dbSDimitry Andric // The OMPT event must occur under mutual exclusion, 46285ffd83dbSDimitry Andric // otherwise the tool might access ptask after free 46295ffd83dbSDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) 46305ffd83dbSDimitry Andric __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill); 46315ffd83dbSDimitry Andric #endif 46325ffd83dbSDimitry Andric } 46330b57cec5SDimitry Andric event->type = KMP_EVENT_UNINITIALIZED; 46340b57cec5SDimitry Andric __kmp_release_tas_lock(&event->lock, gtid); 46350b57cec5SDimitry Andric 46360b57cec5SDimitry Andric if (detached) { 46375ffd83dbSDimitry Andric #if OMPT_SUPPORT 46385ffd83dbSDimitry Andric // We free ptask afterwards and know the task is finished, 46395ffd83dbSDimitry Andric // so locking is not necessary 46405ffd83dbSDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) 46415ffd83dbSDimitry Andric __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill); 46425ffd83dbSDimitry Andric #endif 46430b57cec5SDimitry Andric // If the task detached complete the proxy task 46440b57cec5SDimitry Andric if (gtid >= 0) { 46450b57cec5SDimitry Andric kmp_team_t *team = taskdata->td_team; 46460b57cec5SDimitry Andric kmp_info_t *thread = __kmp_get_thread(); 46470b57cec5SDimitry Andric if (thread->th.th_team == team) { 46480b57cec5SDimitry Andric __kmpc_proxy_task_completed(gtid, ptask); 46490b57cec5SDimitry Andric return; 46500b57cec5SDimitry Andric } 46510b57cec5SDimitry Andric } 46520b57cec5SDimitry Andric 46530b57cec5SDimitry Andric // fallback 46540b57cec5SDimitry Andric __kmpc_proxy_task_completed_ooo(ptask); 46550b57cec5SDimitry Andric } 46560b57cec5SDimitry Andric } 46570b57cec5SDimitry Andric } 46580b57cec5SDimitry Andric 46590b57cec5SDimitry Andric // __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task 46600b57cec5SDimitry Andric // for taskloop 46610b57cec5SDimitry Andric // 46620b57cec5SDimitry Andric // thread: allocating thread 46630b57cec5SDimitry Andric // task_src: pointer to source task to be duplicated 466406c3fb27SDimitry Andric // taskloop_recur: used only when dealing with taskgraph, 466506c3fb27SDimitry Andric // indicating whether we need to update task->td_task_id 46660b57cec5SDimitry Andric // returns: a pointer to the allocated kmp_task_t structure (task). 466706c3fb27SDimitry Andric kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src 466806c3fb27SDimitry Andric #if OMPX_TASKGRAPH 466906c3fb27SDimitry Andric , int taskloop_recur 467006c3fb27SDimitry Andric #endif 467106c3fb27SDimitry Andric ) { 46720b57cec5SDimitry Andric kmp_task_t *task; 46730b57cec5SDimitry Andric kmp_taskdata_t *taskdata; 46745ffd83dbSDimitry Andric kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src); 46755ffd83dbSDimitry Andric kmp_taskdata_t *parent_task = taskdata_src->td_parent; // same parent task 46760b57cec5SDimitry Andric size_t shareds_offset; 46770b57cec5SDimitry Andric size_t task_size; 46780b57cec5SDimitry Andric 46790b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, 46800b57cec5SDimitry Andric task_src)); 46810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy == 46820b57cec5SDimitry Andric TASK_FULL); // it should not be proxy task 46830b57cec5SDimitry Andric KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT); 46840b57cec5SDimitry Andric task_size = taskdata_src->td_size_alloc; 46850b57cec5SDimitry Andric 46860b57cec5SDimitry Andric // Allocate a kmp_taskdata_t block and a kmp_task_t block. 46870b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, 46880b57cec5SDimitry Andric task_size)); 46890b57cec5SDimitry Andric #if USE_FAST_MEMORY 46900b57cec5SDimitry Andric taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size); 46910b57cec5SDimitry Andric #else 46920b57cec5SDimitry Andric taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size); 46930b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 46940b57cec5SDimitry Andric KMP_MEMCPY(taskdata, taskdata_src, task_size); 46950b57cec5SDimitry Andric 46960b57cec5SDimitry Andric task = KMP_TASKDATA_TO_TASK(taskdata); 46970b57cec5SDimitry Andric 46980b57cec5SDimitry Andric // Initialize new task (only specific fields not affected by memcpy) 469906c3fb27SDimitry Andric #if OMPX_TASKGRAPH 470006c3fb27SDimitry Andric if (!taskdata->is_taskgraph || taskloop_recur) 47010b57cec5SDimitry Andric taskdata->td_task_id = KMP_GEN_TASK_ID(); 470206c3fb27SDimitry Andric else if (taskdata->is_taskgraph && 470306c3fb27SDimitry Andric __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status)) 470406c3fb27SDimitry Andric taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id); 470506c3fb27SDimitry Andric #else 470606c3fb27SDimitry Andric taskdata->td_task_id = KMP_GEN_TASK_ID(); 470706c3fb27SDimitry Andric #endif 47080b57cec5SDimitry Andric if (task->shareds != NULL) { // need setup shareds pointer 47090b57cec5SDimitry Andric shareds_offset = (char *)task_src->shareds - (char *)taskdata_src; 47100b57cec5SDimitry Andric task->shareds = &((char *)taskdata)[shareds_offset]; 47110b57cec5SDimitry Andric KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 47120b57cec5SDimitry Andric 0); 47130b57cec5SDimitry Andric } 47140b57cec5SDimitry Andric taskdata->td_alloc_thread = thread; 47150b57cec5SDimitry Andric taskdata->td_parent = parent_task; 47165ffd83dbSDimitry Andric // task inherits the taskgroup from the parent task 47175ffd83dbSDimitry Andric taskdata->td_taskgroup = parent_task->td_taskgroup; 47185ffd83dbSDimitry Andric // tied task needs to initialize the td_last_tied at creation, 47195ffd83dbSDimitry Andric // untied one does this when it is scheduled for execution 47205ffd83dbSDimitry Andric if (taskdata->td_flags.tiedness == TASK_TIED) 47215ffd83dbSDimitry Andric taskdata->td_last_tied = taskdata; 47220b57cec5SDimitry Andric 47230b57cec5SDimitry Andric // Only need to keep track of child task counts if team parallel and tasking 47240b57cec5SDimitry Andric // not serialized 47250b57cec5SDimitry Andric if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) { 47260b57cec5SDimitry Andric KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks); 47270b57cec5SDimitry Andric if (parent_task->td_taskgroup) 47280b57cec5SDimitry Andric KMP_ATOMIC_INC(&parent_task->td_taskgroup->count); 47290b57cec5SDimitry Andric // Only need to keep track of allocated child tasks for explicit tasks since 47300b57cec5SDimitry Andric // implicit not deallocated 47310b57cec5SDimitry Andric if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) 47320b57cec5SDimitry Andric KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks); 47330b57cec5SDimitry Andric } 47340b57cec5SDimitry Andric 47350b57cec5SDimitry Andric KA_TRACE(20, 47360b57cec5SDimitry Andric ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n", 47370b57cec5SDimitry Andric thread, taskdata, taskdata->td_parent)); 47380b57cec5SDimitry Andric #if OMPT_SUPPORT 47390b57cec5SDimitry Andric if (UNLIKELY(ompt_enabled.enabled)) 47400b57cec5SDimitry Andric __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid); 47410b57cec5SDimitry Andric #endif 47420b57cec5SDimitry Andric return task; 47430b57cec5SDimitry Andric } 47440b57cec5SDimitry Andric 47450b57cec5SDimitry Andric // Routine optionally generated by the compiler for setting the lastprivate flag 47460b57cec5SDimitry Andric // and calling needed constructors for private/firstprivate objects 47470b57cec5SDimitry Andric // (used to form taskloop tasks from pattern task) 47480b57cec5SDimitry Andric // Parameters: dest task, src task, lastprivate flag. 47490b57cec5SDimitry Andric typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32); 47500b57cec5SDimitry Andric 47510b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(long) == 4 || sizeof(long) == 8); 47520b57cec5SDimitry Andric 47530b57cec5SDimitry Andric // class to encapsulate manipulating loop bounds in a taskloop task. 47540b57cec5SDimitry Andric // this abstracts away the Intel vs GOMP taskloop interface for setting/getting 47550b57cec5SDimitry Andric // the loop bound variables. 47560b57cec5SDimitry Andric class kmp_taskloop_bounds_t { 47570b57cec5SDimitry Andric kmp_task_t *task; 47580b57cec5SDimitry Andric const kmp_taskdata_t *taskdata; 47590b57cec5SDimitry Andric size_t lower_offset; 47600b57cec5SDimitry Andric size_t upper_offset; 47610b57cec5SDimitry Andric 47620b57cec5SDimitry Andric public: 47630b57cec5SDimitry Andric kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub) 47640b57cec5SDimitry Andric : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)), 47650b57cec5SDimitry Andric lower_offset((char *)lb - (char *)task), 47660b57cec5SDimitry Andric upper_offset((char *)ub - (char *)task) { 47670b57cec5SDimitry Andric KMP_DEBUG_ASSERT((char *)lb > (char *)_task); 47680b57cec5SDimitry Andric KMP_DEBUG_ASSERT((char *)ub > (char *)_task); 47690b57cec5SDimitry Andric } 47700b57cec5SDimitry Andric kmp_taskloop_bounds_t(kmp_task_t *_task, const kmp_taskloop_bounds_t &bounds) 47710b57cec5SDimitry Andric : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)), 47720b57cec5SDimitry Andric lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {} 47730b57cec5SDimitry Andric size_t get_lower_offset() const { return lower_offset; } 47740b57cec5SDimitry Andric size_t get_upper_offset() const { return upper_offset; } 47750b57cec5SDimitry Andric kmp_uint64 get_lb() const { 47760b57cec5SDimitry Andric kmp_int64 retval; 47770b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 47780b57cec5SDimitry Andric // Intel task just returns the lower bound normally 47790b57cec5SDimitry Andric if (!taskdata->td_flags.native) { 47800b57cec5SDimitry Andric retval = *(kmp_int64 *)((char *)task + lower_offset); 47810b57cec5SDimitry Andric } else { 47820b57cec5SDimitry Andric // GOMP task has to take into account the sizeof(long) 47830b57cec5SDimitry Andric if (taskdata->td_size_loop_bounds == 4) { 47840b57cec5SDimitry Andric kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds); 47850b57cec5SDimitry Andric retval = (kmp_int64)*lb; 47860b57cec5SDimitry Andric } else { 47870b57cec5SDimitry Andric kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds); 47880b57cec5SDimitry Andric retval = (kmp_int64)*lb; 47890b57cec5SDimitry Andric } 47900b57cec5SDimitry Andric } 47910b57cec5SDimitry Andric #else 4792fe6060f1SDimitry Andric (void)taskdata; 47930b57cec5SDimitry Andric retval = *(kmp_int64 *)((char *)task + lower_offset); 47940b57cec5SDimitry Andric #endif // defined(KMP_GOMP_COMPAT) 47950b57cec5SDimitry Andric return retval; 47960b57cec5SDimitry Andric } 47970b57cec5SDimitry Andric kmp_uint64 get_ub() const { 47980b57cec5SDimitry Andric kmp_int64 retval; 47990b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 48000b57cec5SDimitry Andric // Intel task just returns the upper bound normally 48010b57cec5SDimitry Andric if (!taskdata->td_flags.native) { 48020b57cec5SDimitry Andric retval = *(kmp_int64 *)((char *)task + upper_offset); 48030b57cec5SDimitry Andric } else { 48040b57cec5SDimitry Andric // GOMP task has to take into account the sizeof(long) 48050b57cec5SDimitry Andric if (taskdata->td_size_loop_bounds == 4) { 48060b57cec5SDimitry Andric kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1; 48070b57cec5SDimitry Andric retval = (kmp_int64)*ub; 48080b57cec5SDimitry Andric } else { 48090b57cec5SDimitry Andric kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1; 48100b57cec5SDimitry Andric retval = (kmp_int64)*ub; 48110b57cec5SDimitry Andric } 48120b57cec5SDimitry Andric } 48130b57cec5SDimitry Andric #else 48140b57cec5SDimitry Andric retval = *(kmp_int64 *)((char *)task + upper_offset); 48150b57cec5SDimitry Andric #endif // defined(KMP_GOMP_COMPAT) 48160b57cec5SDimitry Andric return retval; 48170b57cec5SDimitry Andric } 48180b57cec5SDimitry Andric void set_lb(kmp_uint64 lb) { 48190b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 48200b57cec5SDimitry Andric // Intel task just sets the lower bound normally 48210b57cec5SDimitry Andric if (!taskdata->td_flags.native) { 48220b57cec5SDimitry Andric *(kmp_uint64 *)((char *)task + lower_offset) = lb; 48230b57cec5SDimitry Andric } else { 48240b57cec5SDimitry Andric // GOMP task has to take into account the sizeof(long) 48250b57cec5SDimitry Andric if (taskdata->td_size_loop_bounds == 4) { 48260b57cec5SDimitry Andric kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds); 48270b57cec5SDimitry Andric *lower = (kmp_uint32)lb; 48280b57cec5SDimitry Andric } else { 48290b57cec5SDimitry Andric kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds); 48300b57cec5SDimitry Andric *lower = (kmp_uint64)lb; 48310b57cec5SDimitry Andric } 48320b57cec5SDimitry Andric } 48330b57cec5SDimitry Andric #else 48340b57cec5SDimitry Andric *(kmp_uint64 *)((char *)task + lower_offset) = lb; 48350b57cec5SDimitry Andric #endif // defined(KMP_GOMP_COMPAT) 48360b57cec5SDimitry Andric } 48370b57cec5SDimitry Andric void set_ub(kmp_uint64 ub) { 48380b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 48390b57cec5SDimitry Andric // Intel task just sets the upper bound normally 48400b57cec5SDimitry Andric if (!taskdata->td_flags.native) { 48410b57cec5SDimitry Andric *(kmp_uint64 *)((char *)task + upper_offset) = ub; 48420b57cec5SDimitry Andric } else { 48430b57cec5SDimitry Andric // GOMP task has to take into account the sizeof(long) 48440b57cec5SDimitry Andric if (taskdata->td_size_loop_bounds == 4) { 48450b57cec5SDimitry Andric kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1; 48460b57cec5SDimitry Andric *upper = (kmp_uint32)ub; 48470b57cec5SDimitry Andric } else { 48480b57cec5SDimitry Andric kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1; 48490b57cec5SDimitry Andric *upper = (kmp_uint64)ub; 48500b57cec5SDimitry Andric } 48510b57cec5SDimitry Andric } 48520b57cec5SDimitry Andric #else 48530b57cec5SDimitry Andric *(kmp_uint64 *)((char *)task + upper_offset) = ub; 48540b57cec5SDimitry Andric #endif // defined(KMP_GOMP_COMPAT) 48550b57cec5SDimitry Andric } 48560b57cec5SDimitry Andric }; 48570b57cec5SDimitry Andric 48580b57cec5SDimitry Andric // __kmp_taskloop_linear: Start tasks of the taskloop linearly 48590b57cec5SDimitry Andric // 48600b57cec5SDimitry Andric // loc Source location information 48610b57cec5SDimitry Andric // gtid Global thread ID 48620b57cec5SDimitry Andric // task Pattern task, exposes the loop iteration range 48630b57cec5SDimitry Andric // lb Pointer to loop lower bound in task structure 48640b57cec5SDimitry Andric // ub Pointer to loop upper bound in task structure 48650b57cec5SDimitry Andric // st Loop stride 48660b57cec5SDimitry Andric // ub_glob Global upper bound (used for lastprivate check) 48670b57cec5SDimitry Andric // num_tasks Number of tasks to execute 48680b57cec5SDimitry Andric // grainsize Number of loop iterations per task 48690b57cec5SDimitry Andric // extras Number of chunks with grainsize+1 iterations 4870e8d8bef9SDimitry Andric // last_chunk Reduction of grainsize for last task 48710b57cec5SDimitry Andric // tc Iterations count 48720b57cec5SDimitry Andric // task_dup Tasks duplication routine 48730b57cec5SDimitry Andric // codeptr_ra Return address for OMPT events 48740b57cec5SDimitry Andric void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, 48750b57cec5SDimitry Andric kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, 48760b57cec5SDimitry Andric kmp_uint64 ub_glob, kmp_uint64 num_tasks, 48770b57cec5SDimitry Andric kmp_uint64 grainsize, kmp_uint64 extras, 4878e8d8bef9SDimitry Andric kmp_int64 last_chunk, kmp_uint64 tc, 48790b57cec5SDimitry Andric #if OMPT_SUPPORT 48800b57cec5SDimitry Andric void *codeptr_ra, 48810b57cec5SDimitry Andric #endif 48820b57cec5SDimitry Andric void *task_dup) { 48830b57cec5SDimitry Andric KMP_COUNT_BLOCK(OMP_TASKLOOP); 48840b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling); 48850b57cec5SDimitry Andric p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; 48860b57cec5SDimitry Andric // compiler provides global bounds here 48870b57cec5SDimitry Andric kmp_taskloop_bounds_t task_bounds(task, lb, ub); 48880b57cec5SDimitry Andric kmp_uint64 lower = task_bounds.get_lb(); 48890b57cec5SDimitry Andric kmp_uint64 upper = task_bounds.get_ub(); 48900b57cec5SDimitry Andric kmp_uint64 i; 48910b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 48920b57cec5SDimitry Andric kmp_taskdata_t *current_task = thread->th.th_current_task; 48930b57cec5SDimitry Andric kmp_task_t *next_task; 48940b57cec5SDimitry Andric kmp_int32 lastpriv = 0; 48950b57cec5SDimitry Andric 4896fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + 4897fe6060f1SDimitry Andric (last_chunk < 0 ? last_chunk : extras)); 48980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_tasks > extras); 48990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_tasks > 0); 49000b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, " 4901e8d8bef9SDimitry Andric "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n", 4902e8d8bef9SDimitry Andric gtid, num_tasks, grainsize, extras, last_chunk, lower, upper, 4903e8d8bef9SDimitry Andric ub_glob, st, task_dup)); 49040b57cec5SDimitry Andric 49050b57cec5SDimitry Andric // Launch num_tasks tasks, assign grainsize iterations each task 49060b57cec5SDimitry Andric for (i = 0; i < num_tasks; ++i) { 49070b57cec5SDimitry Andric kmp_uint64 chunk_minus_1; 49080b57cec5SDimitry Andric if (extras == 0) { 49090b57cec5SDimitry Andric chunk_minus_1 = grainsize - 1; 49100b57cec5SDimitry Andric } else { 49110b57cec5SDimitry Andric chunk_minus_1 = grainsize; 49120b57cec5SDimitry Andric --extras; // first extras iterations get bigger chunk (grainsize+1) 49130b57cec5SDimitry Andric } 49140b57cec5SDimitry Andric upper = lower + st * chunk_minus_1; 4915e8d8bef9SDimitry Andric if (upper > *ub) { 4916e8d8bef9SDimitry Andric upper = *ub; 4917e8d8bef9SDimitry Andric } 49180b57cec5SDimitry Andric if (i == num_tasks - 1) { 49190b57cec5SDimitry Andric // schedule the last task, set lastprivate flag if needed 49200b57cec5SDimitry Andric if (st == 1) { // most common case 49210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(upper == *ub); 49220b57cec5SDimitry Andric if (upper == ub_glob) 49230b57cec5SDimitry Andric lastpriv = 1; 49240b57cec5SDimitry Andric } else if (st > 0) { // positive loop stride 49250b57cec5SDimitry Andric KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper); 49260b57cec5SDimitry Andric if ((kmp_uint64)st > ub_glob - upper) 49270b57cec5SDimitry Andric lastpriv = 1; 49280b57cec5SDimitry Andric } else { // negative loop stride 49290b57cec5SDimitry Andric KMP_DEBUG_ASSERT(upper + st < *ub); 49300b57cec5SDimitry Andric if (upper - ub_glob < (kmp_uint64)(-st)) 49310b57cec5SDimitry Andric lastpriv = 1; 49320b57cec5SDimitry Andric } 49330b57cec5SDimitry Andric } 493406c3fb27SDimitry Andric 493506c3fb27SDimitry Andric #if OMPX_TASKGRAPH 493606c3fb27SDimitry Andric next_task = __kmp_task_dup_alloc(thread, task, /* taskloop_recur */ 0); 493706c3fb27SDimitry Andric #else 49380b57cec5SDimitry Andric next_task = __kmp_task_dup_alloc(thread, task); // allocate new task 493906c3fb27SDimitry Andric #endif 494006c3fb27SDimitry Andric 49410b57cec5SDimitry Andric kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task); 49420b57cec5SDimitry Andric kmp_taskloop_bounds_t next_task_bounds = 49430b57cec5SDimitry Andric kmp_taskloop_bounds_t(next_task, task_bounds); 49440b57cec5SDimitry Andric 49450b57cec5SDimitry Andric // adjust task-specific bounds 49460b57cec5SDimitry Andric next_task_bounds.set_lb(lower); 49470b57cec5SDimitry Andric if (next_taskdata->td_flags.native) { 49480b57cec5SDimitry Andric next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1)); 49490b57cec5SDimitry Andric } else { 49500b57cec5SDimitry Andric next_task_bounds.set_ub(upper); 49510b57cec5SDimitry Andric } 4952480093f4SDimitry Andric if (ptask_dup != NULL) // set lastprivate flag, construct firstprivates, 4953480093f4SDimitry Andric // etc. 49540b57cec5SDimitry Andric ptask_dup(next_task, task, lastpriv); 49550b57cec5SDimitry Andric KA_TRACE(40, 49560b57cec5SDimitry Andric ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " 49570b57cec5SDimitry Andric "upper %lld stride %lld, (offsets %p %p)\n", 49580b57cec5SDimitry Andric gtid, i, next_task, lower, upper, st, 49590b57cec5SDimitry Andric next_task_bounds.get_lower_offset(), 49600b57cec5SDimitry Andric next_task_bounds.get_upper_offset())); 49610b57cec5SDimitry Andric #if OMPT_SUPPORT 49620b57cec5SDimitry Andric __kmp_omp_taskloop_task(NULL, gtid, next_task, 49630b57cec5SDimitry Andric codeptr_ra); // schedule new task 496481ad6265SDimitry Andric #if OMPT_OPTIONAL 496581ad6265SDimitry Andric if (ompt_enabled.ompt_callback_dispatch) { 496681ad6265SDimitry Andric OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk, 496781ad6265SDimitry Andric lower, upper, st); 496881ad6265SDimitry Andric } 496981ad6265SDimitry Andric #endif // OMPT_OPTIONAL 49700b57cec5SDimitry Andric #else 49710b57cec5SDimitry Andric __kmp_omp_task(gtid, next_task, true); // schedule new task 49720b57cec5SDimitry Andric #endif 49730b57cec5SDimitry Andric lower = upper + st; // adjust lower bound for the next iteration 49740b57cec5SDimitry Andric } 49750b57cec5SDimitry Andric // free the pattern task and exit 49760b57cec5SDimitry Andric __kmp_task_start(gtid, task, current_task); // make internal bookkeeping 49770b57cec5SDimitry Andric // do not execute the pattern task, just do internal bookkeeping 49780b57cec5SDimitry Andric __kmp_task_finish<false>(gtid, task, current_task); 49790b57cec5SDimitry Andric } 49800b57cec5SDimitry Andric 49810b57cec5SDimitry Andric // Structure to keep taskloop parameters for auxiliary task 49820b57cec5SDimitry Andric // kept in the shareds of the task structure. 49830b57cec5SDimitry Andric typedef struct __taskloop_params { 49840b57cec5SDimitry Andric kmp_task_t *task; 49850b57cec5SDimitry Andric kmp_uint64 *lb; 49860b57cec5SDimitry Andric kmp_uint64 *ub; 49870b57cec5SDimitry Andric void *task_dup; 49880b57cec5SDimitry Andric kmp_int64 st; 49890b57cec5SDimitry Andric kmp_uint64 ub_glob; 49900b57cec5SDimitry Andric kmp_uint64 num_tasks; 49910b57cec5SDimitry Andric kmp_uint64 grainsize; 49920b57cec5SDimitry Andric kmp_uint64 extras; 4993e8d8bef9SDimitry Andric kmp_int64 last_chunk; 49940b57cec5SDimitry Andric kmp_uint64 tc; 49950b57cec5SDimitry Andric kmp_uint64 num_t_min; 49960b57cec5SDimitry Andric #if OMPT_SUPPORT 49970b57cec5SDimitry Andric void *codeptr_ra; 49980b57cec5SDimitry Andric #endif 49990b57cec5SDimitry Andric } __taskloop_params_t; 50000b57cec5SDimitry Andric 50010b57cec5SDimitry Andric void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *, 50020b57cec5SDimitry Andric kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64, 5003e8d8bef9SDimitry Andric kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64, 5004e8d8bef9SDimitry Andric kmp_uint64, 50050b57cec5SDimitry Andric #if OMPT_SUPPORT 50060b57cec5SDimitry Andric void *, 50070b57cec5SDimitry Andric #endif 50080b57cec5SDimitry Andric void *); 50090b57cec5SDimitry Andric 5010480093f4SDimitry Andric // Execute part of the taskloop submitted as a task. 50110b57cec5SDimitry Andric int __kmp_taskloop_task(int gtid, void *ptask) { 50120b57cec5SDimitry Andric __taskloop_params_t *p = 50130b57cec5SDimitry Andric (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds; 50140b57cec5SDimitry Andric kmp_task_t *task = p->task; 50150b57cec5SDimitry Andric kmp_uint64 *lb = p->lb; 50160b57cec5SDimitry Andric kmp_uint64 *ub = p->ub; 50170b57cec5SDimitry Andric void *task_dup = p->task_dup; 50180b57cec5SDimitry Andric // p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; 50190b57cec5SDimitry Andric kmp_int64 st = p->st; 50200b57cec5SDimitry Andric kmp_uint64 ub_glob = p->ub_glob; 50210b57cec5SDimitry Andric kmp_uint64 num_tasks = p->num_tasks; 50220b57cec5SDimitry Andric kmp_uint64 grainsize = p->grainsize; 50230b57cec5SDimitry Andric kmp_uint64 extras = p->extras; 5024e8d8bef9SDimitry Andric kmp_int64 last_chunk = p->last_chunk; 50250b57cec5SDimitry Andric kmp_uint64 tc = p->tc; 50260b57cec5SDimitry Andric kmp_uint64 num_t_min = p->num_t_min; 50270b57cec5SDimitry Andric #if OMPT_SUPPORT 50280b57cec5SDimitry Andric void *codeptr_ra = p->codeptr_ra; 50290b57cec5SDimitry Andric #endif 50300b57cec5SDimitry Andric #if KMP_DEBUG 50310b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 50320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task != NULL); 5033e8d8bef9SDimitry Andric KA_TRACE(20, 5034e8d8bef9SDimitry Andric ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize" 5035e8d8bef9SDimitry Andric " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n", 5036e8d8bef9SDimitry Andric gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub, 5037e8d8bef9SDimitry Andric st, task_dup)); 50380b57cec5SDimitry Andric #endif 50390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min); 50400b57cec5SDimitry Andric if (num_tasks > num_t_min) 50410b57cec5SDimitry Andric __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks, 5042e8d8bef9SDimitry Andric grainsize, extras, last_chunk, tc, num_t_min, 50430b57cec5SDimitry Andric #if OMPT_SUPPORT 50440b57cec5SDimitry Andric codeptr_ra, 50450b57cec5SDimitry Andric #endif 50460b57cec5SDimitry Andric task_dup); 50470b57cec5SDimitry Andric else 50480b57cec5SDimitry Andric __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks, 5049e8d8bef9SDimitry Andric grainsize, extras, last_chunk, tc, 50500b57cec5SDimitry Andric #if OMPT_SUPPORT 50510b57cec5SDimitry Andric codeptr_ra, 50520b57cec5SDimitry Andric #endif 50530b57cec5SDimitry Andric task_dup); 50540b57cec5SDimitry Andric 50550b57cec5SDimitry Andric KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid)); 50560b57cec5SDimitry Andric return 0; 50570b57cec5SDimitry Andric } 50580b57cec5SDimitry Andric 5059480093f4SDimitry Andric // Schedule part of the taskloop as a task, 5060480093f4SDimitry Andric // execute the rest of the taskloop. 50610b57cec5SDimitry Andric // 50620b57cec5SDimitry Andric // loc Source location information 50630b57cec5SDimitry Andric // gtid Global thread ID 50640b57cec5SDimitry Andric // task Pattern task, exposes the loop iteration range 50650b57cec5SDimitry Andric // lb Pointer to loop lower bound in task structure 50660b57cec5SDimitry Andric // ub Pointer to loop upper bound in task structure 50670b57cec5SDimitry Andric // st Loop stride 50680b57cec5SDimitry Andric // ub_glob Global upper bound (used for lastprivate check) 50690b57cec5SDimitry Andric // num_tasks Number of tasks to execute 50700b57cec5SDimitry Andric // grainsize Number of loop iterations per task 50710b57cec5SDimitry Andric // extras Number of chunks with grainsize+1 iterations 5072e8d8bef9SDimitry Andric // last_chunk Reduction of grainsize for last task 50730b57cec5SDimitry Andric // tc Iterations count 50745ffd83dbSDimitry Andric // num_t_min Threshold to launch tasks recursively 50750b57cec5SDimitry Andric // task_dup Tasks duplication routine 50760b57cec5SDimitry Andric // codeptr_ra Return address for OMPT events 50770b57cec5SDimitry Andric void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task, 50780b57cec5SDimitry Andric kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, 50790b57cec5SDimitry Andric kmp_uint64 ub_glob, kmp_uint64 num_tasks, 50800b57cec5SDimitry Andric kmp_uint64 grainsize, kmp_uint64 extras, 5081e8d8bef9SDimitry Andric kmp_int64 last_chunk, kmp_uint64 tc, 5082e8d8bef9SDimitry Andric kmp_uint64 num_t_min, 50830b57cec5SDimitry Andric #if OMPT_SUPPORT 50840b57cec5SDimitry Andric void *codeptr_ra, 50850b57cec5SDimitry Andric #endif 50860b57cec5SDimitry Andric void *task_dup) { 50870b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 50880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task != NULL); 50890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_tasks > num_t_min); 5090e8d8bef9SDimitry Andric KA_TRACE(20, 5091e8d8bef9SDimitry Andric ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize" 5092e8d8bef9SDimitry Andric " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n", 5093e8d8bef9SDimitry Andric gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub, 5094e8d8bef9SDimitry Andric st, task_dup)); 50950b57cec5SDimitry Andric p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; 50960b57cec5SDimitry Andric kmp_uint64 lower = *lb; 50970b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 50980b57cec5SDimitry Andric // kmp_taskdata_t *current_task = thread->th.th_current_task; 50990b57cec5SDimitry Andric kmp_task_t *next_task; 51000b57cec5SDimitry Andric size_t lower_offset = 51010b57cec5SDimitry Andric (char *)lb - (char *)task; // remember offset of lb in the task structure 51020b57cec5SDimitry Andric size_t upper_offset = 51030b57cec5SDimitry Andric (char *)ub - (char *)task; // remember offset of ub in the task structure 51040b57cec5SDimitry Andric 5105fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + 5106fe6060f1SDimitry Andric (last_chunk < 0 ? last_chunk : extras)); 51070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_tasks > extras); 51080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_tasks > 0); 51090b57cec5SDimitry Andric 51100b57cec5SDimitry Andric // split the loop in two halves 51110b57cec5SDimitry Andric kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1; 5112e8d8bef9SDimitry Andric kmp_int64 last_chunk0 = 0, last_chunk1 = 0; 51130b57cec5SDimitry Andric kmp_uint64 gr_size0 = grainsize; 51140b57cec5SDimitry Andric kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute 51150b57cec5SDimitry Andric kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task 5116e8d8bef9SDimitry Andric if (last_chunk < 0) { 5117e8d8bef9SDimitry Andric ext0 = ext1 = 0; 5118e8d8bef9SDimitry Andric last_chunk1 = last_chunk; 5119e8d8bef9SDimitry Andric tc0 = grainsize * n_tsk0; 5120e8d8bef9SDimitry Andric tc1 = tc - tc0; 5121e8d8bef9SDimitry Andric } else if (n_tsk0 <= extras) { 51220b57cec5SDimitry Andric gr_size0++; // integrate extras into grainsize 51230b57cec5SDimitry Andric ext0 = 0; // no extra iters in 1st half 51240b57cec5SDimitry Andric ext1 = extras - n_tsk0; // remaining extras 51250b57cec5SDimitry Andric tc0 = gr_size0 * n_tsk0; 51260b57cec5SDimitry Andric tc1 = tc - tc0; 51270b57cec5SDimitry Andric } else { // n_tsk0 > extras 51280b57cec5SDimitry Andric ext1 = 0; // no extra iters in 2nd half 51290b57cec5SDimitry Andric ext0 = extras; 51300b57cec5SDimitry Andric tc1 = grainsize * n_tsk1; 51310b57cec5SDimitry Andric tc0 = tc - tc1; 51320b57cec5SDimitry Andric } 51330b57cec5SDimitry Andric ub0 = lower + st * (tc0 - 1); 51340b57cec5SDimitry Andric lb1 = ub0 + st; 51350b57cec5SDimitry Andric 51360b57cec5SDimitry Andric // create pattern task for 2nd half of the loop 513706c3fb27SDimitry Andric #if OMPX_TASKGRAPH 513806c3fb27SDimitry Andric next_task = __kmp_task_dup_alloc(thread, task, 513906c3fb27SDimitry Andric /* taskloop_recur */ 1); 514006c3fb27SDimitry Andric #else 51410b57cec5SDimitry Andric next_task = __kmp_task_dup_alloc(thread, task); // duplicate the task 514206c3fb27SDimitry Andric #endif 51430b57cec5SDimitry Andric // adjust lower bound (upper bound is not changed) for the 2nd half 51440b57cec5SDimitry Andric *(kmp_uint64 *)((char *)next_task + lower_offset) = lb1; 5145480093f4SDimitry Andric if (ptask_dup != NULL) // construct firstprivates, etc. 51460b57cec5SDimitry Andric ptask_dup(next_task, task, 0); 51470b57cec5SDimitry Andric *ub = ub0; // adjust upper bound for the 1st half 51480b57cec5SDimitry Andric 51490b57cec5SDimitry Andric // create auxiliary task for 2nd half of the loop 51505ffd83dbSDimitry Andric // make sure new task has same parent task as the pattern task 51515ffd83dbSDimitry Andric kmp_taskdata_t *current_task = thread->th.th_current_task; 51525ffd83dbSDimitry Andric thread->th.th_current_task = taskdata->td_parent; 51530b57cec5SDimitry Andric kmp_task_t *new_task = 51540b57cec5SDimitry Andric __kmpc_omp_task_alloc(loc, gtid, 1, 3 * sizeof(void *), 51550b57cec5SDimitry Andric sizeof(__taskloop_params_t), &__kmp_taskloop_task); 51565ffd83dbSDimitry Andric // restore current task 51575ffd83dbSDimitry Andric thread->th.th_current_task = current_task; 51580b57cec5SDimitry Andric __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds; 51590b57cec5SDimitry Andric p->task = next_task; 51600b57cec5SDimitry Andric p->lb = (kmp_uint64 *)((char *)next_task + lower_offset); 51610b57cec5SDimitry Andric p->ub = (kmp_uint64 *)((char *)next_task + upper_offset); 51620b57cec5SDimitry Andric p->task_dup = task_dup; 51630b57cec5SDimitry Andric p->st = st; 51640b57cec5SDimitry Andric p->ub_glob = ub_glob; 51650b57cec5SDimitry Andric p->num_tasks = n_tsk1; 51660b57cec5SDimitry Andric p->grainsize = grainsize; 51670b57cec5SDimitry Andric p->extras = ext1; 5168e8d8bef9SDimitry Andric p->last_chunk = last_chunk1; 51690b57cec5SDimitry Andric p->tc = tc1; 51700b57cec5SDimitry Andric p->num_t_min = num_t_min; 51710b57cec5SDimitry Andric #if OMPT_SUPPORT 51720b57cec5SDimitry Andric p->codeptr_ra = codeptr_ra; 51730b57cec5SDimitry Andric #endif 51740b57cec5SDimitry Andric 517506c3fb27SDimitry Andric #if OMPX_TASKGRAPH 517606c3fb27SDimitry Andric kmp_taskdata_t *new_task_data = KMP_TASK_TO_TASKDATA(new_task); 517706c3fb27SDimitry Andric new_task_data->tdg = taskdata->tdg; 517806c3fb27SDimitry Andric new_task_data->is_taskgraph = 0; 517906c3fb27SDimitry Andric #endif 518006c3fb27SDimitry Andric 51810b57cec5SDimitry Andric #if OMPT_SUPPORT 51820b57cec5SDimitry Andric // schedule new task with correct return address for OMPT events 51830b57cec5SDimitry Andric __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra); 51840b57cec5SDimitry Andric #else 51850b57cec5SDimitry Andric __kmp_omp_task(gtid, new_task, true); // schedule new task 51860b57cec5SDimitry Andric #endif 51870b57cec5SDimitry Andric 51880b57cec5SDimitry Andric // execute the 1st half of current subrange 51890b57cec5SDimitry Andric if (n_tsk0 > num_t_min) 51900b57cec5SDimitry Andric __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0, 5191e8d8bef9SDimitry Andric ext0, last_chunk0, tc0, num_t_min, 51920b57cec5SDimitry Andric #if OMPT_SUPPORT 51930b57cec5SDimitry Andric codeptr_ra, 51940b57cec5SDimitry Andric #endif 51950b57cec5SDimitry Andric task_dup); 51960b57cec5SDimitry Andric else 51970b57cec5SDimitry Andric __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, 5198e8d8bef9SDimitry Andric gr_size0, ext0, last_chunk0, tc0, 51990b57cec5SDimitry Andric #if OMPT_SUPPORT 52000b57cec5SDimitry Andric codeptr_ra, 52010b57cec5SDimitry Andric #endif 52020b57cec5SDimitry Andric task_dup); 52030b57cec5SDimitry Andric 5204e8d8bef9SDimitry Andric KA_TRACE(40, ("__kmp_taskloop_recur(exit): T#%d\n", gtid)); 52050b57cec5SDimitry Andric } 52060b57cec5SDimitry Andric 5207e8d8bef9SDimitry Andric static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, 5208e8d8bef9SDimitry Andric kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, 5209e8d8bef9SDimitry Andric int nogroup, int sched, kmp_uint64 grainsize, 5210e8d8bef9SDimitry Andric int modifier, void *task_dup) { 52110b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 52120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(task != NULL); 52130b57cec5SDimitry Andric if (nogroup == 0) { 52140b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 52150b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 52160b57cec5SDimitry Andric #endif 52170b57cec5SDimitry Andric __kmpc_taskgroup(loc, gtid); 52180b57cec5SDimitry Andric } 52190b57cec5SDimitry Andric 522006c3fb27SDimitry Andric #if OMPX_TASKGRAPH 522106c3fb27SDimitry Andric KMP_ATOMIC_DEC(&__kmp_tdg_task_id); 522206c3fb27SDimitry Andric #endif 52230b57cec5SDimitry Andric // ========================================================================= 52240b57cec5SDimitry Andric // calculate loop parameters 52250b57cec5SDimitry Andric kmp_taskloop_bounds_t task_bounds(task, lb, ub); 52260b57cec5SDimitry Andric kmp_uint64 tc; 52270b57cec5SDimitry Andric // compiler provides global bounds here 52280b57cec5SDimitry Andric kmp_uint64 lower = task_bounds.get_lb(); 52290b57cec5SDimitry Andric kmp_uint64 upper = task_bounds.get_ub(); 52300b57cec5SDimitry Andric kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag 52310b57cec5SDimitry Andric kmp_uint64 num_tasks = 0, extras = 0; 5232e8d8bef9SDimitry Andric kmp_int64 last_chunk = 5233e8d8bef9SDimitry Andric 0; // reduce grainsize of last task by last_chunk in strict mode 52340b57cec5SDimitry Andric kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks; 52350b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 52360b57cec5SDimitry Andric kmp_taskdata_t *current_task = thread->th.th_current_task; 52370b57cec5SDimitry Andric 5238e8d8bef9SDimitry Andric KA_TRACE(20, ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, " 5239e8d8bef9SDimitry Andric "grain %llu(%d, %d), dup %p\n", 5240e8d8bef9SDimitry Andric gtid, taskdata, lower, upper, st, grainsize, sched, modifier, 5241e8d8bef9SDimitry Andric task_dup)); 52420b57cec5SDimitry Andric 52430b57cec5SDimitry Andric // compute trip count 52440b57cec5SDimitry Andric if (st == 1) { // most common case 52450b57cec5SDimitry Andric tc = upper - lower + 1; 52460b57cec5SDimitry Andric } else if (st < 0) { 52470b57cec5SDimitry Andric tc = (lower - upper) / (-st) + 1; 52480b57cec5SDimitry Andric } else { // st > 0 52490b57cec5SDimitry Andric tc = (upper - lower) / st + 1; 52500b57cec5SDimitry Andric } 52510b57cec5SDimitry Andric if (tc == 0) { 5252e8d8bef9SDimitry Andric KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid)); 52530b57cec5SDimitry Andric // free the pattern task and exit 52540b57cec5SDimitry Andric __kmp_task_start(gtid, task, current_task); 52550b57cec5SDimitry Andric // do not execute anything for zero-trip loop 52560b57cec5SDimitry Andric __kmp_task_finish<false>(gtid, task, current_task); 52570b57cec5SDimitry Andric return; 52580b57cec5SDimitry Andric } 52590b57cec5SDimitry Andric 52600b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 52610b57cec5SDimitry Andric ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 52620b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 52630b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_work) { 52640b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_work)( 52650b57cec5SDimitry Andric ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), 52660b57cec5SDimitry Andric &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); 52670b57cec5SDimitry Andric } 52680b57cec5SDimitry Andric #endif 52690b57cec5SDimitry Andric 52700b57cec5SDimitry Andric if (num_tasks_min == 0) 52710b57cec5SDimitry Andric // TODO: can we choose better default heuristic? 52720b57cec5SDimitry Andric num_tasks_min = 52730b57cec5SDimitry Andric KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE); 52740b57cec5SDimitry Andric 52750b57cec5SDimitry Andric // compute num_tasks/grainsize based on the input provided 52760b57cec5SDimitry Andric switch (sched) { 52770b57cec5SDimitry Andric case 0: // no schedule clause specified, we can choose the default 52780b57cec5SDimitry Andric // let's try to schedule (team_size*10) tasks 52790b57cec5SDimitry Andric grainsize = thread->th.th_team_nproc * 10; 52800b57cec5SDimitry Andric KMP_FALLTHROUGH(); 52810b57cec5SDimitry Andric case 2: // num_tasks provided 52820b57cec5SDimitry Andric if (grainsize > tc) { 52830b57cec5SDimitry Andric num_tasks = tc; // too big num_tasks requested, adjust values 52840b57cec5SDimitry Andric grainsize = 1; 52850b57cec5SDimitry Andric extras = 0; 52860b57cec5SDimitry Andric } else { 52870b57cec5SDimitry Andric num_tasks = grainsize; 52880b57cec5SDimitry Andric grainsize = tc / num_tasks; 52890b57cec5SDimitry Andric extras = tc % num_tasks; 52900b57cec5SDimitry Andric } 52910b57cec5SDimitry Andric break; 52920b57cec5SDimitry Andric case 1: // grainsize provided 52930b57cec5SDimitry Andric if (grainsize > tc) { 5294e8d8bef9SDimitry Andric num_tasks = 1; 5295e8d8bef9SDimitry Andric grainsize = tc; // too big grainsize requested, adjust values 5296e8d8bef9SDimitry Andric extras = 0; 5297e8d8bef9SDimitry Andric } else { 5298e8d8bef9SDimitry Andric if (modifier) { 5299e8d8bef9SDimitry Andric num_tasks = (tc + grainsize - 1) / grainsize; 5300e8d8bef9SDimitry Andric last_chunk = tc - (num_tasks * grainsize); 53010b57cec5SDimitry Andric extras = 0; 53020b57cec5SDimitry Andric } else { 53030b57cec5SDimitry Andric num_tasks = tc / grainsize; 53040b57cec5SDimitry Andric // adjust grainsize for balanced distribution of iterations 53050b57cec5SDimitry Andric grainsize = tc / num_tasks; 53060b57cec5SDimitry Andric extras = tc % num_tasks; 53070b57cec5SDimitry Andric } 5308e8d8bef9SDimitry Andric } 53090b57cec5SDimitry Andric break; 53100b57cec5SDimitry Andric default: 53110b57cec5SDimitry Andric KMP_ASSERT2(0, "unknown scheduling of taskloop"); 53120b57cec5SDimitry Andric } 5313e8d8bef9SDimitry Andric 5314fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + 5315fe6060f1SDimitry Andric (last_chunk < 0 ? last_chunk : extras)); 53160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_tasks > extras); 53170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_tasks > 0); 53180b57cec5SDimitry Andric // ========================================================================= 53190b57cec5SDimitry Andric 53200b57cec5SDimitry Andric // check if clause value first 53210b57cec5SDimitry Andric // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native) 53220b57cec5SDimitry Andric if (if_val == 0) { // if(0) specified, mark task as serial 53230b57cec5SDimitry Andric taskdata->td_flags.task_serial = 1; 53240b57cec5SDimitry Andric taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied 53250b57cec5SDimitry Andric // always start serial tasks linearly 53260b57cec5SDimitry Andric __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, 5327e8d8bef9SDimitry Andric grainsize, extras, last_chunk, tc, 53280b57cec5SDimitry Andric #if OMPT_SUPPORT 53290b57cec5SDimitry Andric OMPT_GET_RETURN_ADDRESS(0), 53300b57cec5SDimitry Andric #endif 53310b57cec5SDimitry Andric task_dup); 53320b57cec5SDimitry Andric // !taskdata->td_flags.native => currently force linear spawning of tasks 53330b57cec5SDimitry Andric // for GOMP_taskloop 53340b57cec5SDimitry Andric } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) { 5335e8d8bef9SDimitry Andric KA_TRACE(20, ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" 5336e8d8bef9SDimitry Andric "(%lld), grain %llu, extras %llu, last_chunk %lld\n", 5337e8d8bef9SDimitry Andric gtid, tc, num_tasks, num_tasks_min, grainsize, extras, 5338e8d8bef9SDimitry Andric last_chunk)); 53390b57cec5SDimitry Andric __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, 5340e8d8bef9SDimitry Andric grainsize, extras, last_chunk, tc, num_tasks_min, 53410b57cec5SDimitry Andric #if OMPT_SUPPORT 53420b57cec5SDimitry Andric OMPT_GET_RETURN_ADDRESS(0), 53430b57cec5SDimitry Andric #endif 53440b57cec5SDimitry Andric task_dup); 53450b57cec5SDimitry Andric } else { 5346e8d8bef9SDimitry Andric KA_TRACE(20, ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu" 5347e8d8bef9SDimitry Andric "(%lld), grain %llu, extras %llu, last_chunk %lld\n", 5348e8d8bef9SDimitry Andric gtid, tc, num_tasks, num_tasks_min, grainsize, extras, 5349e8d8bef9SDimitry Andric last_chunk)); 53500b57cec5SDimitry Andric __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, 5351e8d8bef9SDimitry Andric grainsize, extras, last_chunk, tc, 53520b57cec5SDimitry Andric #if OMPT_SUPPORT 53530b57cec5SDimitry Andric OMPT_GET_RETURN_ADDRESS(0), 53540b57cec5SDimitry Andric #endif 53550b57cec5SDimitry Andric task_dup); 53560b57cec5SDimitry Andric } 53570b57cec5SDimitry Andric 53580b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 53590b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_work) { 53600b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_work)( 53610b57cec5SDimitry Andric ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), 53620b57cec5SDimitry Andric &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); 53630b57cec5SDimitry Andric } 53640b57cec5SDimitry Andric #endif 53650b57cec5SDimitry Andric 53660b57cec5SDimitry Andric if (nogroup == 0) { 53670b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 53680b57cec5SDimitry Andric OMPT_STORE_RETURN_ADDRESS(gtid); 53690b57cec5SDimitry Andric #endif 53700b57cec5SDimitry Andric __kmpc_end_taskgroup(loc, gtid); 53710b57cec5SDimitry Andric } 5372e8d8bef9SDimitry Andric KA_TRACE(20, ("__kmp_taskloop(exit): T#%d\n", gtid)); 5373e8d8bef9SDimitry Andric } 5374e8d8bef9SDimitry Andric 5375e8d8bef9SDimitry Andric /*! 5376e8d8bef9SDimitry Andric @ingroup TASKING 5377e8d8bef9SDimitry Andric @param loc Source location information 5378e8d8bef9SDimitry Andric @param gtid Global thread ID 5379e8d8bef9SDimitry Andric @param task Task structure 5380e8d8bef9SDimitry Andric @param if_val Value of the if clause 5381e8d8bef9SDimitry Andric @param lb Pointer to loop lower bound in task structure 5382e8d8bef9SDimitry Andric @param ub Pointer to loop upper bound in task structure 5383e8d8bef9SDimitry Andric @param st Loop stride 5384e8d8bef9SDimitry Andric @param nogroup Flag, 1 if nogroup clause specified, 0 otherwise 5385e8d8bef9SDimitry Andric @param sched Schedule specified 0/1/2 for none/grainsize/num_tasks 5386e8d8bef9SDimitry Andric @param grainsize Schedule value if specified 5387e8d8bef9SDimitry Andric @param task_dup Tasks duplication routine 5388e8d8bef9SDimitry Andric 5389e8d8bef9SDimitry Andric Execute the taskloop construct. 5390e8d8bef9SDimitry Andric */ 5391e8d8bef9SDimitry Andric void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, 5392e8d8bef9SDimitry Andric kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, 5393e8d8bef9SDimitry Andric int sched, kmp_uint64 grainsize, void *task_dup) { 5394e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 5395e8d8bef9SDimitry Andric KA_TRACE(20, ("__kmpc_taskloop(enter): T#%d\n", gtid)); 5396e8d8bef9SDimitry Andric __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize, 5397e8d8bef9SDimitry Andric 0, task_dup); 53980b57cec5SDimitry Andric KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); 53990b57cec5SDimitry Andric } 5400e8d8bef9SDimitry Andric 5401e8d8bef9SDimitry Andric /*! 5402e8d8bef9SDimitry Andric @ingroup TASKING 5403e8d8bef9SDimitry Andric @param loc Source location information 5404e8d8bef9SDimitry Andric @param gtid Global thread ID 5405e8d8bef9SDimitry Andric @param task Task structure 5406e8d8bef9SDimitry Andric @param if_val Value of the if clause 5407e8d8bef9SDimitry Andric @param lb Pointer to loop lower bound in task structure 5408e8d8bef9SDimitry Andric @param ub Pointer to loop upper bound in task structure 5409e8d8bef9SDimitry Andric @param st Loop stride 5410e8d8bef9SDimitry Andric @param nogroup Flag, 1 if nogroup clause specified, 0 otherwise 5411e8d8bef9SDimitry Andric @param sched Schedule specified 0/1/2 for none/grainsize/num_tasks 5412e8d8bef9SDimitry Andric @param grainsize Schedule value if specified 541381ad6265SDimitry Andric @param modifier Modifier 'strict' for sched, 1 if present, 0 otherwise 5414e8d8bef9SDimitry Andric @param task_dup Tasks duplication routine 5415e8d8bef9SDimitry Andric 5416e8d8bef9SDimitry Andric Execute the taskloop construct. 5417e8d8bef9SDimitry Andric */ 5418e8d8bef9SDimitry Andric void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, 5419e8d8bef9SDimitry Andric kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, 5420e8d8bef9SDimitry Andric int nogroup, int sched, kmp_uint64 grainsize, 5421e8d8bef9SDimitry Andric int modifier, void *task_dup) { 5422e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 5423e8d8bef9SDimitry Andric KA_TRACE(20, ("__kmpc_taskloop_5(enter): T#%d\n", gtid)); 5424e8d8bef9SDimitry Andric __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize, 5425e8d8bef9SDimitry Andric modifier, task_dup); 5426e8d8bef9SDimitry Andric KA_TRACE(20, ("__kmpc_taskloop_5(exit): T#%d\n", gtid)); 5427e8d8bef9SDimitry Andric } 5428bdd1243dSDimitry Andric 5429bdd1243dSDimitry Andric /*! 5430bdd1243dSDimitry Andric @ingroup TASKING 5431bdd1243dSDimitry Andric @param gtid Global Thread ID of current thread 5432bdd1243dSDimitry Andric @return Returns a pointer to the thread's current task async handle. If no task 5433bdd1243dSDimitry Andric is present or gtid is invalid, returns NULL. 5434bdd1243dSDimitry Andric 5435bdd1243dSDimitry Andric Acqurires a pointer to the target async handle from the current task. 5436bdd1243dSDimitry Andric */ 5437bdd1243dSDimitry Andric void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid) { 5438bdd1243dSDimitry Andric if (gtid == KMP_GTID_DNE) 5439bdd1243dSDimitry Andric return NULL; 5440bdd1243dSDimitry Andric 5441bdd1243dSDimitry Andric kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 5442bdd1243dSDimitry Andric kmp_taskdata_t *taskdata = thread->th.th_current_task; 5443bdd1243dSDimitry Andric 5444bdd1243dSDimitry Andric if (!taskdata) 5445bdd1243dSDimitry Andric return NULL; 5446bdd1243dSDimitry Andric 5447bdd1243dSDimitry Andric return &taskdata->td_target_data.async_handle; 5448bdd1243dSDimitry Andric } 5449bdd1243dSDimitry Andric 5450bdd1243dSDimitry Andric /*! 5451bdd1243dSDimitry Andric @ingroup TASKING 5452bdd1243dSDimitry Andric @param gtid Global Thread ID of current thread 5453bdd1243dSDimitry Andric @return Returns TRUE if the current task being executed of the given thread has 5454bdd1243dSDimitry Andric a task team allocated to it. Otherwise, returns FALSE. 5455bdd1243dSDimitry Andric 5456bdd1243dSDimitry Andric Checks if the current thread has a task team. 5457bdd1243dSDimitry Andric */ 5458bdd1243dSDimitry Andric bool __kmpc_omp_has_task_team(kmp_int32 gtid) { 5459bdd1243dSDimitry Andric if (gtid == KMP_GTID_DNE) 5460bdd1243dSDimitry Andric return FALSE; 5461bdd1243dSDimitry Andric 5462bdd1243dSDimitry Andric kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 5463bdd1243dSDimitry Andric kmp_taskdata_t *taskdata = thread->th.th_current_task; 5464bdd1243dSDimitry Andric 5465bdd1243dSDimitry Andric if (!taskdata) 5466bdd1243dSDimitry Andric return FALSE; 5467bdd1243dSDimitry Andric 5468bdd1243dSDimitry Andric return taskdata->td_task_team != NULL; 5469bdd1243dSDimitry Andric } 547006c3fb27SDimitry Andric 547106c3fb27SDimitry Andric #if OMPX_TASKGRAPH 547206c3fb27SDimitry Andric // __kmp_find_tdg: identify a TDG through its ID 547306c3fb27SDimitry Andric // gtid: Global Thread ID 547406c3fb27SDimitry Andric // tdg_id: ID of the TDG 547506c3fb27SDimitry Andric // returns: If a TDG corresponding to this ID is found and not 547606c3fb27SDimitry Andric // its initial state, return the pointer to it, otherwise nullptr 547706c3fb27SDimitry Andric static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) { 547806c3fb27SDimitry Andric kmp_tdg_info_t *res = nullptr; 547906c3fb27SDimitry Andric if (__kmp_max_tdgs == 0) 548006c3fb27SDimitry Andric return res; 548106c3fb27SDimitry Andric 548206c3fb27SDimitry Andric if (__kmp_global_tdgs == NULL) 548306c3fb27SDimitry Andric __kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate( 548406c3fb27SDimitry Andric sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs); 548506c3fb27SDimitry Andric 548606c3fb27SDimitry Andric if ((__kmp_global_tdgs[tdg_id]) && 548706c3fb27SDimitry Andric (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE)) 548806c3fb27SDimitry Andric res = __kmp_global_tdgs[tdg_id]; 548906c3fb27SDimitry Andric return res; 549006c3fb27SDimitry Andric } 549106c3fb27SDimitry Andric 549206c3fb27SDimitry Andric // __kmp_print_tdg_dot: prints the TDG to a dot file 549306c3fb27SDimitry Andric // tdg: ID of the TDG 549406c3fb27SDimitry Andric void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg) { 549506c3fb27SDimitry Andric kmp_int32 tdg_id = tdg->tdg_id; 549606c3fb27SDimitry Andric KA_TRACE(10, ("__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", gtid, tdg_id)); 549706c3fb27SDimitry Andric 549806c3fb27SDimitry Andric char file_name[20]; 549906c3fb27SDimitry Andric sprintf(file_name, "tdg_%d.dot", tdg_id); 550006c3fb27SDimitry Andric kmp_safe_raii_file_t tdg_file(file_name, "w"); 550106c3fb27SDimitry Andric 550206c3fb27SDimitry Andric kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks); 550306c3fb27SDimitry Andric fprintf(tdg_file, 550406c3fb27SDimitry Andric "digraph TDG {\n" 550506c3fb27SDimitry Andric " compound=true\n" 550606c3fb27SDimitry Andric " subgraph cluster {\n" 550706c3fb27SDimitry Andric " label=TDG_%d\n", 550806c3fb27SDimitry Andric tdg_id); 550906c3fb27SDimitry Andric for (kmp_int32 i = 0; i < num_tasks; i++) { 551006c3fb27SDimitry Andric fprintf(tdg_file, " %d[style=bold]\n", i); 551106c3fb27SDimitry Andric } 551206c3fb27SDimitry Andric fprintf(tdg_file, " }\n"); 551306c3fb27SDimitry Andric for (kmp_int32 i = 0; i < num_tasks; i++) { 551406c3fb27SDimitry Andric kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors; 551506c3fb27SDimitry Andric kmp_int32 *successors = tdg->record_map[i].successors; 551606c3fb27SDimitry Andric if (nsuccessors > 0) { 551706c3fb27SDimitry Andric for (kmp_int32 j = 0; j < nsuccessors; j++) 551806c3fb27SDimitry Andric fprintf(tdg_file, " %d -> %d \n", i, successors[j]); 551906c3fb27SDimitry Andric } 552006c3fb27SDimitry Andric } 552106c3fb27SDimitry Andric fprintf(tdg_file, "}"); 552206c3fb27SDimitry Andric KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id)); 552306c3fb27SDimitry Andric } 552406c3fb27SDimitry Andric 552506c3fb27SDimitry Andric // __kmp_start_record: launch the execution of a previous 552606c3fb27SDimitry Andric // recorded TDG 552706c3fb27SDimitry Andric // gtid: Global Thread ID 552806c3fb27SDimitry Andric // tdg: ID of the TDG 552906c3fb27SDimitry Andric void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) { 553006c3fb27SDimitry Andric KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY); 553106c3fb27SDimitry Andric KA_TRACE(10, ("__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid, 553206c3fb27SDimitry Andric tdg->tdg_id, tdg->num_roots)); 553306c3fb27SDimitry Andric kmp_node_info_t *this_record_map = tdg->record_map; 553406c3fb27SDimitry Andric kmp_int32 *this_root_tasks = tdg->root_tasks; 553506c3fb27SDimitry Andric kmp_int32 this_num_roots = tdg->num_roots; 553606c3fb27SDimitry Andric kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks); 553706c3fb27SDimitry Andric 553806c3fb27SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 553906c3fb27SDimitry Andric kmp_taskdata_t *parent_task = thread->th.th_current_task; 554006c3fb27SDimitry Andric 554106c3fb27SDimitry Andric if (tdg->rec_taskred_data) { 554206c3fb27SDimitry Andric __kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data); 554306c3fb27SDimitry Andric } 554406c3fb27SDimitry Andric 554506c3fb27SDimitry Andric for (kmp_int32 j = 0; j < this_num_tasks; j++) { 554606c3fb27SDimitry Andric kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task); 554706c3fb27SDimitry Andric 554806c3fb27SDimitry Andric td->td_parent = parent_task; 554906c3fb27SDimitry Andric this_record_map[j].parent_task = parent_task; 555006c3fb27SDimitry Andric 555106c3fb27SDimitry Andric kmp_taskgroup_t *parent_taskgroup = 555206c3fb27SDimitry Andric this_record_map[j].parent_task->td_taskgroup; 555306c3fb27SDimitry Andric 555406c3fb27SDimitry Andric KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter, 555506c3fb27SDimitry Andric this_record_map[j].npredecessors); 555606c3fb27SDimitry Andric KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks); 555706c3fb27SDimitry Andric 555806c3fb27SDimitry Andric if (parent_taskgroup) { 555906c3fb27SDimitry Andric KMP_ATOMIC_INC(&parent_taskgroup->count); 556006c3fb27SDimitry Andric // The taskgroup is different so we must update it 556106c3fb27SDimitry Andric td->td_taskgroup = parent_taskgroup; 556206c3fb27SDimitry Andric } else if (td->td_taskgroup != nullptr) { 556306c3fb27SDimitry Andric // If the parent doesnt have a taskgroup, remove it from the task 556406c3fb27SDimitry Andric td->td_taskgroup = nullptr; 556506c3fb27SDimitry Andric } 556606c3fb27SDimitry Andric if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT) 556706c3fb27SDimitry Andric KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks); 556806c3fb27SDimitry Andric } 556906c3fb27SDimitry Andric 557006c3fb27SDimitry Andric for (kmp_int32 j = 0; j < this_num_roots; ++j) { 557106c3fb27SDimitry Andric __kmp_omp_task(gtid, this_record_map[this_root_tasks[j]].task, true); 557206c3fb27SDimitry Andric } 557306c3fb27SDimitry Andric KA_TRACE(10, ("__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid, 557406c3fb27SDimitry Andric tdg->tdg_id, tdg->num_roots)); 557506c3fb27SDimitry Andric } 557606c3fb27SDimitry Andric 557706c3fb27SDimitry Andric // __kmp_start_record: set up a TDG structure and turn the 557806c3fb27SDimitry Andric // recording flag to true 557906c3fb27SDimitry Andric // gtid: Global Thread ID of the encountering thread 558006c3fb27SDimitry Andric // input_flags: Flags associated with the TDG 558106c3fb27SDimitry Andric // tdg_id: ID of the TDG to record 558206c3fb27SDimitry Andric static inline void __kmp_start_record(kmp_int32 gtid, 558306c3fb27SDimitry Andric kmp_taskgraph_flags_t *flags, 558406c3fb27SDimitry Andric kmp_int32 tdg_id) { 558506c3fb27SDimitry Andric kmp_tdg_info_t *tdg = 558606c3fb27SDimitry Andric (kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t)); 558706c3fb27SDimitry Andric __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg; 558806c3fb27SDimitry Andric // Initializing the TDG structure 558906c3fb27SDimitry Andric tdg->tdg_id = tdg_id; 559006c3fb27SDimitry Andric tdg->map_size = INIT_MAPSIZE; 559106c3fb27SDimitry Andric tdg->num_roots = -1; 559206c3fb27SDimitry Andric tdg->root_tasks = nullptr; 559306c3fb27SDimitry Andric tdg->tdg_status = KMP_TDG_RECORDING; 559406c3fb27SDimitry Andric tdg->rec_num_taskred = 0; 559506c3fb27SDimitry Andric tdg->rec_taskred_data = nullptr; 559606c3fb27SDimitry Andric KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0); 559706c3fb27SDimitry Andric 559806c3fb27SDimitry Andric // Initializing the list of nodes in this TDG 559906c3fb27SDimitry Andric kmp_node_info_t *this_record_map = 560006c3fb27SDimitry Andric (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t)); 560106c3fb27SDimitry Andric for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) { 560206c3fb27SDimitry Andric kmp_int32 *successorsList = 560306c3fb27SDimitry Andric (kmp_int32 *)__kmp_allocate(__kmp_successors_size * sizeof(kmp_int32)); 560406c3fb27SDimitry Andric this_record_map[i].task = nullptr; 560506c3fb27SDimitry Andric this_record_map[i].successors = successorsList; 560606c3fb27SDimitry Andric this_record_map[i].nsuccessors = 0; 560706c3fb27SDimitry Andric this_record_map[i].npredecessors = 0; 560806c3fb27SDimitry Andric this_record_map[i].successors_size = __kmp_successors_size; 560906c3fb27SDimitry Andric KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0); 561006c3fb27SDimitry Andric } 561106c3fb27SDimitry Andric 561206c3fb27SDimitry Andric __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map; 561306c3fb27SDimitry Andric } 561406c3fb27SDimitry Andric 561506c3fb27SDimitry Andric // __kmpc_start_record_task: Wrapper around __kmp_start_record to mark 561606c3fb27SDimitry Andric // the beginning of the record process of a task region 561706c3fb27SDimitry Andric // loc_ref: Location of TDG, not used yet 561806c3fb27SDimitry Andric // gtid: Global Thread ID of the encountering thread 561906c3fb27SDimitry Andric // input_flags: Flags associated with the TDG 562006c3fb27SDimitry Andric // tdg_id: ID of the TDG to record, for now, incremental integer 562106c3fb27SDimitry Andric // returns: 1 if we record, otherwise, 0 562206c3fb27SDimitry Andric kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid, 562306c3fb27SDimitry Andric kmp_int32 input_flags, kmp_int32 tdg_id) { 562406c3fb27SDimitry Andric 562506c3fb27SDimitry Andric kmp_int32 res; 562606c3fb27SDimitry Andric kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags; 562706c3fb27SDimitry Andric KA_TRACE(10, 562806c3fb27SDimitry Andric ("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n", 562906c3fb27SDimitry Andric gtid, loc_ref, input_flags, tdg_id)); 563006c3fb27SDimitry Andric 563106c3fb27SDimitry Andric if (__kmp_max_tdgs == 0) { 563206c3fb27SDimitry Andric KA_TRACE( 563306c3fb27SDimitry Andric 10, 563406c3fb27SDimitry Andric ("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, " 563506c3fb27SDimitry Andric "__kmp_max_tdgs = 0\n", 563606c3fb27SDimitry Andric gtid, loc_ref, input_flags, tdg_id)); 563706c3fb27SDimitry Andric return 1; 563806c3fb27SDimitry Andric } 563906c3fb27SDimitry Andric 564006c3fb27SDimitry Andric __kmpc_taskgroup(loc_ref, gtid); 564106c3fb27SDimitry Andric if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) { 564206c3fb27SDimitry Andric // TODO: use re_record flag 564306c3fb27SDimitry Andric __kmp_exec_tdg(gtid, tdg); 564406c3fb27SDimitry Andric res = 0; 564506c3fb27SDimitry Andric } else { 564606c3fb27SDimitry Andric __kmp_curr_tdg_idx = tdg_id; 564706c3fb27SDimitry Andric KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs); 564806c3fb27SDimitry Andric __kmp_start_record(gtid, flags, tdg_id); 564906c3fb27SDimitry Andric __kmp_num_tdg++; 565006c3fb27SDimitry Andric res = 1; 565106c3fb27SDimitry Andric } 565206c3fb27SDimitry Andric KA_TRACE(10, ("__kmpc_start_record_task(exit): T#%d TDG %d starts to %s\n", 565306c3fb27SDimitry Andric gtid, tdg_id, res ? "record" : "execute")); 565406c3fb27SDimitry Andric return res; 565506c3fb27SDimitry Andric } 565606c3fb27SDimitry Andric 565706c3fb27SDimitry Andric // __kmp_end_record: set up a TDG after recording it 565806c3fb27SDimitry Andric // gtid: Global thread ID 565906c3fb27SDimitry Andric // tdg: Pointer to the TDG 566006c3fb27SDimitry Andric void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) { 566106c3fb27SDimitry Andric // Store roots 566206c3fb27SDimitry Andric kmp_node_info_t *this_record_map = tdg->record_map; 566306c3fb27SDimitry Andric kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks); 566406c3fb27SDimitry Andric kmp_int32 *this_root_tasks = 566506c3fb27SDimitry Andric (kmp_int32 *)__kmp_allocate(this_num_tasks * sizeof(kmp_int32)); 566606c3fb27SDimitry Andric kmp_int32 this_map_size = tdg->map_size; 566706c3fb27SDimitry Andric kmp_int32 this_num_roots = 0; 566806c3fb27SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 566906c3fb27SDimitry Andric 567006c3fb27SDimitry Andric for (kmp_int32 i = 0; i < this_num_tasks; i++) { 567106c3fb27SDimitry Andric if (this_record_map[i].npredecessors == 0) { 567206c3fb27SDimitry Andric this_root_tasks[this_num_roots++] = i; 567306c3fb27SDimitry Andric } 567406c3fb27SDimitry Andric } 567506c3fb27SDimitry Andric 567606c3fb27SDimitry Andric // Update with roots info and mapsize 567706c3fb27SDimitry Andric tdg->map_size = this_map_size; 567806c3fb27SDimitry Andric tdg->num_roots = this_num_roots; 567906c3fb27SDimitry Andric tdg->root_tasks = this_root_tasks; 568006c3fb27SDimitry Andric KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING); 568106c3fb27SDimitry Andric tdg->tdg_status = KMP_TDG_READY; 568206c3fb27SDimitry Andric 568306c3fb27SDimitry Andric if (thread->th.th_current_task->td_dephash) { 568406c3fb27SDimitry Andric __kmp_dephash_free(thread, thread->th.th_current_task->td_dephash); 568506c3fb27SDimitry Andric thread->th.th_current_task->td_dephash = NULL; 568606c3fb27SDimitry Andric } 568706c3fb27SDimitry Andric 568806c3fb27SDimitry Andric // Reset predecessor counter 568906c3fb27SDimitry Andric for (kmp_int32 i = 0; i < this_num_tasks; i++) { 569006c3fb27SDimitry Andric KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 569106c3fb27SDimitry Andric this_record_map[i].npredecessors); 569206c3fb27SDimitry Andric } 569306c3fb27SDimitry Andric KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0); 569406c3fb27SDimitry Andric 569506c3fb27SDimitry Andric if (__kmp_tdg_dot) 569606c3fb27SDimitry Andric __kmp_print_tdg_dot(tdg); 569706c3fb27SDimitry Andric } 569806c3fb27SDimitry Andric 569906c3fb27SDimitry Andric // __kmpc_end_record_task: wrapper around __kmp_end_record to mark 570006c3fb27SDimitry Andric // the end of recording phase 570106c3fb27SDimitry Andric // 570206c3fb27SDimitry Andric // loc_ref: Source location information 570306c3fb27SDimitry Andric // gtid: Global thread ID 570406c3fb27SDimitry Andric // input_flags: Flags attached to the graph 570506c3fb27SDimitry Andric // tdg_id: ID of the TDG just finished recording 570606c3fb27SDimitry Andric void __kmpc_end_record_task(ident_t *loc_ref, kmp_int32 gtid, 570706c3fb27SDimitry Andric kmp_int32 input_flags, kmp_int32 tdg_id) { 570806c3fb27SDimitry Andric kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id); 570906c3fb27SDimitry Andric 571006c3fb27SDimitry Andric KA_TRACE(10, ("__kmpc_end_record_task(enter): T#%d loc=%p finishes recording" 571106c3fb27SDimitry Andric " tdg=%d with flags=%d\n", 571206c3fb27SDimitry Andric gtid, loc_ref, tdg_id, input_flags)); 571306c3fb27SDimitry Andric if (__kmp_max_tdgs) { 571406c3fb27SDimitry Andric // TODO: use input_flags->nowait 571506c3fb27SDimitry Andric __kmpc_end_taskgroup(loc_ref, gtid); 571606c3fb27SDimitry Andric if (__kmp_tdg_is_recording(tdg->tdg_status)) 571706c3fb27SDimitry Andric __kmp_end_record(gtid, tdg); 571806c3fb27SDimitry Andric } 571906c3fb27SDimitry Andric KA_TRACE(10, ("__kmpc_end_record_task(exit): T#%d loc=%p finished recording" 572006c3fb27SDimitry Andric " tdg=%d, its status is now READY\n", 572106c3fb27SDimitry Andric gtid, loc_ref, tdg_id)); 572206c3fb27SDimitry Andric } 572306c3fb27SDimitry Andric #endif 5724