xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_tasking.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_tasking.cpp -- OpenMP 3.0 tasking support.
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "kmp.h"
140b57cec5SDimitry Andric #include "kmp_i18n.h"
150b57cec5SDimitry Andric #include "kmp_itt.h"
160b57cec5SDimitry Andric #include "kmp_stats.h"
170b57cec5SDimitry Andric #include "kmp_wait_release.h"
180b57cec5SDimitry Andric #include "kmp_taskdeps.h"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric #if OMPT_SUPPORT
210b57cec5SDimitry Andric #include "ompt-specific.h"
220b57cec5SDimitry Andric #endif
230b57cec5SDimitry Andric 
24bdd1243dSDimitry Andric #if ENABLE_LIBOMPTARGET
2506c3fb27SDimitry Andric static void (*tgt_target_nowait_query)(void **);
2606c3fb27SDimitry Andric 
2706c3fb27SDimitry Andric void __kmp_init_target_task() {
2806c3fb27SDimitry Andric   *(void **)(&tgt_target_nowait_query) = KMP_DLSYM("__tgt_target_nowait_query");
2906c3fb27SDimitry Andric }
30bdd1243dSDimitry Andric #endif
31bdd1243dSDimitry Andric 
320b57cec5SDimitry Andric /* forward declaration */
330b57cec5SDimitry Andric static void __kmp_enable_tasking(kmp_task_team_t *task_team,
340b57cec5SDimitry Andric                                  kmp_info_t *this_thr);
350b57cec5SDimitry Andric static void __kmp_alloc_task_deque(kmp_info_t *thread,
360b57cec5SDimitry Andric                                    kmp_thread_data_t *thread_data);
370b57cec5SDimitry Andric static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
380b57cec5SDimitry Andric                                            kmp_task_team_t *task_team);
390b57cec5SDimitry Andric static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
4006c3fb27SDimitry Andric #if OMPX_TASKGRAPH
4106c3fb27SDimitry Andric static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id);
4206c3fb27SDimitry Andric int __kmp_taskloop_task(int gtid, void *ptask);
4306c3fb27SDimitry Andric #endif
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric //  __kmp_trace_task_stack: print the tied tasks from the task stack in order
480b57cec5SDimitry Andric //  from top do bottom
490b57cec5SDimitry Andric //
500b57cec5SDimitry Andric //  gtid: global thread identifier for thread containing stack
510b57cec5SDimitry Andric //  thread_data: thread data for task team thread containing stack
520b57cec5SDimitry Andric //  threshold: value above which the trace statement triggers
530b57cec5SDimitry Andric //  location: string identifying call site of this function (for trace)
540b57cec5SDimitry Andric static void __kmp_trace_task_stack(kmp_int32 gtid,
550b57cec5SDimitry Andric                                    kmp_thread_data_t *thread_data,
560b57cec5SDimitry Andric                                    int threshold, char *location) {
570b57cec5SDimitry Andric   kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
580b57cec5SDimitry Andric   kmp_taskdata_t **stack_top = task_stack->ts_top;
590b57cec5SDimitry Andric   kmp_int32 entries = task_stack->ts_entries;
600b57cec5SDimitry Andric   kmp_taskdata_t *tied_task;
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric   KA_TRACE(
630b57cec5SDimitry Andric       threshold,
640b57cec5SDimitry Andric       ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
650b57cec5SDimitry Andric        "first_block = %p, stack_top = %p \n",
660b57cec5SDimitry Andric        location, gtid, entries, task_stack->ts_first_block, stack_top));
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(stack_top != NULL);
690b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(entries > 0);
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric   while (entries != 0) {
720b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
730b57cec5SDimitry Andric     // fix up ts_top if we need to pop from previous block
740b57cec5SDimitry Andric     if (entries & TASK_STACK_INDEX_MASK == 0) {
750b57cec5SDimitry Andric       kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric       stack_block = stack_block->sb_prev;
780b57cec5SDimitry Andric       stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
790b57cec5SDimitry Andric     }
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric     // finish bookkeeping
820b57cec5SDimitry Andric     stack_top--;
830b57cec5SDimitry Andric     entries--;
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric     tied_task = *stack_top;
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(tied_task != NULL);
880b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric     KA_TRACE(threshold,
910b57cec5SDimitry Andric              ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "
920b57cec5SDimitry Andric               "stack_top=%p, tied_task=%p\n",
930b57cec5SDimitry Andric               location, gtid, entries, stack_top, tied_task));
940b57cec5SDimitry Andric   }
950b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric   KA_TRACE(threshold,
980b57cec5SDimitry Andric            ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
990b57cec5SDimitry Andric             location, gtid));
1000b57cec5SDimitry Andric }
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric //  __kmp_init_task_stack: initialize the task stack for the first time
1030b57cec5SDimitry Andric //  after a thread_data structure is created.
1040b57cec5SDimitry Andric //  It should not be necessary to do this again (assuming the stack works).
1050b57cec5SDimitry Andric //
1060b57cec5SDimitry Andric //  gtid: global thread identifier of calling thread
1070b57cec5SDimitry Andric //  thread_data: thread data for task team thread containing stack
1080b57cec5SDimitry Andric static void __kmp_init_task_stack(kmp_int32 gtid,
1090b57cec5SDimitry Andric                                   kmp_thread_data_t *thread_data) {
1100b57cec5SDimitry Andric   kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
1110b57cec5SDimitry Andric   kmp_stack_block_t *first_block;
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric   // set up the first block of the stack
1140b57cec5SDimitry Andric   first_block = &task_stack->ts_first_block;
1150b57cec5SDimitry Andric   task_stack->ts_top = (kmp_taskdata_t **)first_block;
1160b57cec5SDimitry Andric   memset((void *)first_block, '\0',
1170b57cec5SDimitry Andric          TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric   // initialize the stack to be empty
1200b57cec5SDimitry Andric   task_stack->ts_entries = TASK_STACK_EMPTY;
1210b57cec5SDimitry Andric   first_block->sb_next = NULL;
1220b57cec5SDimitry Andric   first_block->sb_prev = NULL;
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric //  __kmp_free_task_stack: free the task stack when thread_data is destroyed.
1260b57cec5SDimitry Andric //
1270b57cec5SDimitry Andric //  gtid: global thread identifier for calling thread
1280b57cec5SDimitry Andric //  thread_data: thread info for thread containing stack
1290b57cec5SDimitry Andric static void __kmp_free_task_stack(kmp_int32 gtid,
1300b57cec5SDimitry Andric                                   kmp_thread_data_t *thread_data) {
1310b57cec5SDimitry Andric   kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
1320b57cec5SDimitry Andric   kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
1350b57cec5SDimitry Andric   // free from the second block of the stack
1360b57cec5SDimitry Andric   while (stack_block != NULL) {
1370b57cec5SDimitry Andric     kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric     stack_block->sb_next = NULL;
1400b57cec5SDimitry Andric     stack_block->sb_prev = NULL;
1410b57cec5SDimitry Andric     if (stack_block != &task_stack->ts_first_block) {
1420b57cec5SDimitry Andric       __kmp_thread_free(thread,
1430b57cec5SDimitry Andric                         stack_block); // free the block, if not the first
1440b57cec5SDimitry Andric     }
1450b57cec5SDimitry Andric     stack_block = next_block;
1460b57cec5SDimitry Andric   }
1470b57cec5SDimitry Andric   // initialize the stack to be empty
1480b57cec5SDimitry Andric   task_stack->ts_entries = 0;
1490b57cec5SDimitry Andric   task_stack->ts_top = NULL;
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric //  __kmp_push_task_stack: Push the tied task onto the task stack.
1530b57cec5SDimitry Andric //     Grow the stack if necessary by allocating another block.
1540b57cec5SDimitry Andric //
1550b57cec5SDimitry Andric //  gtid: global thread identifier for calling thread
1560b57cec5SDimitry Andric //  thread: thread info for thread containing stack
1570b57cec5SDimitry Andric //  tied_task: the task to push on the stack
1580b57cec5SDimitry Andric static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
1590b57cec5SDimitry Andric                                   kmp_taskdata_t *tied_task) {
1600b57cec5SDimitry Andric   // GEH - need to consider what to do if tt_threads_data not allocated yet
1610b57cec5SDimitry Andric   kmp_thread_data_t *thread_data =
1620b57cec5SDimitry Andric       &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
1630b57cec5SDimitry Andric   kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric   if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
1660b57cec5SDimitry Andric     return; // Don't push anything on stack if team or team tasks are serialized
1670b57cec5SDimitry Andric   }
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
1700b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   KA_TRACE(20,
1730b57cec5SDimitry Andric            ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
1740b57cec5SDimitry Andric             gtid, thread, tied_task));
1750b57cec5SDimitry Andric   // Store entry
1760b57cec5SDimitry Andric   *(task_stack->ts_top) = tied_task;
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric   // Do bookkeeping for next push
1790b57cec5SDimitry Andric   task_stack->ts_top++;
1800b57cec5SDimitry Andric   task_stack->ts_entries++;
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric   if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
1830b57cec5SDimitry Andric     // Find beginning of this task block
1840b57cec5SDimitry Andric     kmp_stack_block_t *stack_block =
1850b57cec5SDimitry Andric         (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric     // Check if we already have a block
1880b57cec5SDimitry Andric     if (stack_block->sb_next !=
1890b57cec5SDimitry Andric         NULL) { // reset ts_top to beginning of next block
1900b57cec5SDimitry Andric       task_stack->ts_top = &stack_block->sb_next->sb_block[0];
1910b57cec5SDimitry Andric     } else { // Alloc new block and link it up
1920b57cec5SDimitry Andric       kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
1930b57cec5SDimitry Andric           thread, sizeof(kmp_stack_block_t));
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric       task_stack->ts_top = &new_block->sb_block[0];
1960b57cec5SDimitry Andric       stack_block->sb_next = new_block;
1970b57cec5SDimitry Andric       new_block->sb_prev = stack_block;
1980b57cec5SDimitry Andric       new_block->sb_next = NULL;
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric       KA_TRACE(
2010b57cec5SDimitry Andric           30,
2020b57cec5SDimitry Andric           ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
2030b57cec5SDimitry Andric            gtid, tied_task, new_block));
2040b57cec5SDimitry Andric     }
2050b57cec5SDimitry Andric   }
2060b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
2070b57cec5SDimitry Andric                 tied_task));
2080b57cec5SDimitry Andric }
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric //  __kmp_pop_task_stack: Pop the tied task from the task stack.  Don't return
2110b57cec5SDimitry Andric //  the task, just check to make sure it matches the ending task passed in.
2120b57cec5SDimitry Andric //
2130b57cec5SDimitry Andric //  gtid: global thread identifier for the calling thread
2140b57cec5SDimitry Andric //  thread: thread info structure containing stack
2150b57cec5SDimitry Andric //  tied_task: the task popped off the stack
2160b57cec5SDimitry Andric //  ending_task: the task that is ending (should match popped task)
2170b57cec5SDimitry Andric static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
2180b57cec5SDimitry Andric                                  kmp_taskdata_t *ending_task) {
2190b57cec5SDimitry Andric   // GEH - need to consider what to do if tt_threads_data not allocated yet
2200b57cec5SDimitry Andric   kmp_thread_data_t *thread_data =
2210b57cec5SDimitry Andric       &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
2220b57cec5SDimitry Andric   kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
2230b57cec5SDimitry Andric   kmp_taskdata_t *tied_task;
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric   if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
2260b57cec5SDimitry Andric     // Don't pop anything from stack if team or team tasks are serialized
2270b57cec5SDimitry Andric     return;
2280b57cec5SDimitry Andric   }
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
2310b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
2340b57cec5SDimitry Andric                 thread));
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric   // fix up ts_top if we need to pop from previous block
2370b57cec5SDimitry Andric   if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
2380b57cec5SDimitry Andric     kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric     stack_block = stack_block->sb_prev;
2410b57cec5SDimitry Andric     task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
2420b57cec5SDimitry Andric   }
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric   // finish bookkeeping
2450b57cec5SDimitry Andric   task_stack->ts_top--;
2460b57cec5SDimitry Andric   task_stack->ts_entries--;
2470b57cec5SDimitry Andric 
2480b57cec5SDimitry Andric   tied_task = *(task_stack->ts_top);
2490b57cec5SDimitry Andric 
2500b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(tied_task != NULL);
2510b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
2520b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
2550b57cec5SDimitry Andric                 tied_task));
2560b57cec5SDimitry Andric   return;
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric #endif /* BUILD_TIED_TASK_STACK */
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric // returns 1 if new task is allowed to execute, 0 otherwise
2610b57cec5SDimitry Andric // checks Task Scheduling constraint (if requested) and
2620b57cec5SDimitry Andric // mutexinoutset dependencies if any
2630b57cec5SDimitry Andric static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained,
2640b57cec5SDimitry Andric                                   const kmp_taskdata_t *tasknew,
2650b57cec5SDimitry Andric                                   const kmp_taskdata_t *taskcurr) {
2660b57cec5SDimitry Andric   if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
2670b57cec5SDimitry Andric     // Check if the candidate obeys the Task Scheduling Constraints (TSC)
2680b57cec5SDimitry Andric     // only descendant of all deferred tied tasks can be scheduled, checking
2690b57cec5SDimitry Andric     // the last one is enough, as it in turn is the descendant of all others
2700b57cec5SDimitry Andric     kmp_taskdata_t *current = taskcurr->td_last_tied;
2710b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(current != NULL);
2720b57cec5SDimitry Andric     // check if the task is not suspended on barrier
2730b57cec5SDimitry Andric     if (current->td_flags.tasktype == TASK_EXPLICIT ||
2740b57cec5SDimitry Andric         current->td_taskwait_thread > 0) { // <= 0 on barrier
2750b57cec5SDimitry Andric       kmp_int32 level = current->td_level;
2760b57cec5SDimitry Andric       kmp_taskdata_t *parent = tasknew->td_parent;
2770b57cec5SDimitry Andric       while (parent != current && parent->td_level > level) {
2780b57cec5SDimitry Andric         // check generation up to the level of the current task
2790b57cec5SDimitry Andric         parent = parent->td_parent;
2800b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(parent != NULL);
2810b57cec5SDimitry Andric       }
2820b57cec5SDimitry Andric       if (parent != current)
2830b57cec5SDimitry Andric         return false;
2840b57cec5SDimitry Andric     }
2850b57cec5SDimitry Andric   }
2860b57cec5SDimitry Andric   // Check mutexinoutset dependencies, acquire locks
2870b57cec5SDimitry Andric   kmp_depnode_t *node = tasknew->td_depnode;
28806c3fb27SDimitry Andric #if OMPX_TASKGRAPH
28906c3fb27SDimitry Andric   if (!tasknew->is_taskgraph && UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
29006c3fb27SDimitry Andric #else
291e8d8bef9SDimitry Andric   if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
29206c3fb27SDimitry Andric #endif
2930b57cec5SDimitry Andric     for (int i = 0; i < node->dn.mtx_num_locks; ++i) {
2940b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
2950b57cec5SDimitry Andric       if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
2960b57cec5SDimitry Andric         continue;
2970b57cec5SDimitry Andric       // could not get the lock, release previous locks
2980b57cec5SDimitry Andric       for (int j = i - 1; j >= 0; --j)
2990b57cec5SDimitry Andric         __kmp_release_lock(node->dn.mtx_locks[j], gtid);
3000b57cec5SDimitry Andric       return false;
3010b57cec5SDimitry Andric     }
3020b57cec5SDimitry Andric     // negative num_locks means all locks acquired successfully
3030b57cec5SDimitry Andric     node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
3040b57cec5SDimitry Andric   }
3050b57cec5SDimitry Andric   return true;
3060b57cec5SDimitry Andric }
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric // __kmp_realloc_task_deque:
3090b57cec5SDimitry Andric // Re-allocates a task deque for a particular thread, copies the content from
3100b57cec5SDimitry Andric // the old deque and adjusts the necessary data structures relating to the
3110b57cec5SDimitry Andric // deque. This operation must be done with the deque_lock being held
3120b57cec5SDimitry Andric static void __kmp_realloc_task_deque(kmp_info_t *thread,
3130b57cec5SDimitry Andric                                      kmp_thread_data_t *thread_data) {
3140b57cec5SDimitry Andric   kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
3155ffd83dbSDimitry Andric   KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
3160b57cec5SDimitry Andric   kmp_int32 new_size = 2 * size;
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric   KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
3190b57cec5SDimitry Andric                 "%d] for thread_data %p\n",
3200b57cec5SDimitry Andric                 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric   kmp_taskdata_t **new_deque =
3230b57cec5SDimitry Andric       (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *));
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric   int i, j;
3260b57cec5SDimitry Andric   for (i = thread_data->td.td_deque_head, j = 0; j < size;
3270b57cec5SDimitry Andric        i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
3280b57cec5SDimitry Andric     new_deque[j] = thread_data->td.td_deque[i];
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric   __kmp_free(thread_data->td.td_deque);
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric   thread_data->td.td_deque_head = 0;
3330b57cec5SDimitry Andric   thread_data->td.td_deque_tail = size;
3340b57cec5SDimitry Andric   thread_data->td.td_deque = new_deque;
3350b57cec5SDimitry Andric   thread_data->td.td_deque_size = new_size;
3360b57cec5SDimitry Andric }
3370b57cec5SDimitry Andric 
33881ad6265SDimitry Andric static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
33981ad6265SDimitry Andric   kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(sizeof(kmp_task_pri_t));
34081ad6265SDimitry Andric   kmp_thread_data_t *thread_data = &l->td;
34181ad6265SDimitry Andric   __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
34281ad6265SDimitry Andric   thread_data->td.td_deque_last_stolen = -1;
34381ad6265SDimitry Andric   KE_TRACE(20, ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
34481ad6265SDimitry Andric                 "for thread_data %p\n",
34581ad6265SDimitry Andric                 __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data));
34681ad6265SDimitry Andric   thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
34781ad6265SDimitry Andric       INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
34881ad6265SDimitry Andric   thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
34981ad6265SDimitry Andric   return l;
35081ad6265SDimitry Andric }
35181ad6265SDimitry Andric 
35281ad6265SDimitry Andric // The function finds the deque of priority tasks with given priority, or
35381ad6265SDimitry Andric // allocates a new deque and put it into sorted (high -> low) list of deques.
35481ad6265SDimitry Andric // Deques of non-default priority tasks are shared between all threads in team,
35581ad6265SDimitry Andric // as opposed to per-thread deques of tasks with default priority.
35681ad6265SDimitry Andric // The function is called under the lock task_team->tt.tt_task_pri_lock.
35781ad6265SDimitry Andric static kmp_thread_data_t *
35881ad6265SDimitry Andric __kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
35981ad6265SDimitry Andric   kmp_thread_data_t *thread_data;
36081ad6265SDimitry Andric   kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
36181ad6265SDimitry Andric   if (lst->priority == pri) {
36281ad6265SDimitry Andric     // Found queue of tasks with given priority.
36381ad6265SDimitry Andric     thread_data = &lst->td;
36481ad6265SDimitry Andric   } else if (lst->priority < pri) {
36581ad6265SDimitry Andric     // All current priority queues contain tasks with lower priority.
36681ad6265SDimitry Andric     // Allocate new one for given priority tasks.
36781ad6265SDimitry Andric     kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
36881ad6265SDimitry Andric     thread_data = &list->td;
36981ad6265SDimitry Andric     list->priority = pri;
37081ad6265SDimitry Andric     list->next = lst;
37181ad6265SDimitry Andric     task_team->tt.tt_task_pri_list = list;
37281ad6265SDimitry Andric   } else { // task_team->tt.tt_task_pri_list->priority > pri
37381ad6265SDimitry Andric     kmp_task_pri_t *next_queue = lst->next;
37481ad6265SDimitry Andric     while (next_queue && next_queue->priority > pri) {
37581ad6265SDimitry Andric       lst = next_queue;
37681ad6265SDimitry Andric       next_queue = lst->next;
37781ad6265SDimitry Andric     }
37881ad6265SDimitry Andric     // lst->priority > pri && (next == NULL || pri >= next->priority)
37981ad6265SDimitry Andric     if (next_queue == NULL) {
38081ad6265SDimitry Andric       // No queue with pri priority, need to allocate new one.
38181ad6265SDimitry Andric       kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
38281ad6265SDimitry Andric       thread_data = &list->td;
38381ad6265SDimitry Andric       list->priority = pri;
38481ad6265SDimitry Andric       list->next = NULL;
38581ad6265SDimitry Andric       lst->next = list;
38681ad6265SDimitry Andric     } else if (next_queue->priority == pri) {
38781ad6265SDimitry Andric       // Found queue of tasks with given priority.
38881ad6265SDimitry Andric       thread_data = &next_queue->td;
38981ad6265SDimitry Andric     } else { // lst->priority > pri > next->priority
39081ad6265SDimitry Andric       // insert newly allocated between existed queues
39181ad6265SDimitry Andric       kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
39281ad6265SDimitry Andric       thread_data = &list->td;
39381ad6265SDimitry Andric       list->priority = pri;
39481ad6265SDimitry Andric       list->next = next_queue;
39581ad6265SDimitry Andric       lst->next = list;
39681ad6265SDimitry Andric     }
39781ad6265SDimitry Andric   }
39881ad6265SDimitry Andric   return thread_data;
39981ad6265SDimitry Andric }
40081ad6265SDimitry Andric 
40181ad6265SDimitry Andric //  __kmp_push_priority_task: Add a task to the team's priority task deque
40281ad6265SDimitry Andric static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
40381ad6265SDimitry Andric                                           kmp_taskdata_t *taskdata,
40481ad6265SDimitry Andric                                           kmp_task_team_t *task_team,
40581ad6265SDimitry Andric                                           kmp_int32 pri) {
40681ad6265SDimitry Andric   kmp_thread_data_t *thread_data = NULL;
40781ad6265SDimitry Andric   KA_TRACE(20,
40881ad6265SDimitry Andric            ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",
40981ad6265SDimitry Andric             gtid, taskdata, pri));
41081ad6265SDimitry Andric 
41181ad6265SDimitry Andric   // Find task queue specific to priority value
41281ad6265SDimitry Andric   kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
41381ad6265SDimitry Andric   if (UNLIKELY(lst == NULL)) {
41481ad6265SDimitry Andric     __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
41581ad6265SDimitry Andric     if (task_team->tt.tt_task_pri_list == NULL) {
41681ad6265SDimitry Andric       // List of queues is still empty, allocate one.
41781ad6265SDimitry Andric       kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
41881ad6265SDimitry Andric       thread_data = &list->td;
41981ad6265SDimitry Andric       list->priority = pri;
42081ad6265SDimitry Andric       list->next = NULL;
42181ad6265SDimitry Andric       task_team->tt.tt_task_pri_list = list;
42281ad6265SDimitry Andric     } else {
42381ad6265SDimitry Andric       // Other thread initialized a queue. Check if it fits and get thread_data.
42481ad6265SDimitry Andric       thread_data = __kmp_get_priority_deque_data(task_team, pri);
42581ad6265SDimitry Andric     }
42681ad6265SDimitry Andric     __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
42781ad6265SDimitry Andric   } else {
42881ad6265SDimitry Andric     if (lst->priority == pri) {
42981ad6265SDimitry Andric       // Found queue of tasks with given priority.
43081ad6265SDimitry Andric       thread_data = &lst->td;
43181ad6265SDimitry Andric     } else {
43281ad6265SDimitry Andric       __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
43381ad6265SDimitry Andric       thread_data = __kmp_get_priority_deque_data(task_team, pri);
43481ad6265SDimitry Andric       __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
43581ad6265SDimitry Andric     }
43681ad6265SDimitry Andric   }
43781ad6265SDimitry Andric   KMP_DEBUG_ASSERT(thread_data);
43881ad6265SDimitry Andric 
43981ad6265SDimitry Andric   __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
44081ad6265SDimitry Andric   // Check if deque is full
44181ad6265SDimitry Andric   if (TCR_4(thread_data->td.td_deque_ntasks) >=
44281ad6265SDimitry Andric       TASK_DEQUE_SIZE(thread_data->td)) {
44381ad6265SDimitry Andric     if (__kmp_enable_task_throttling &&
44481ad6265SDimitry Andric         __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
44581ad6265SDimitry Andric                               thread->th.th_current_task)) {
44681ad6265SDimitry Andric       __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
44781ad6265SDimitry Andric       KA_TRACE(20, ("__kmp_push_priority_task: T#%d deque is full; returning "
44881ad6265SDimitry Andric                     "TASK_NOT_PUSHED for task %p\n",
44981ad6265SDimitry Andric                     gtid, taskdata));
45081ad6265SDimitry Andric       return TASK_NOT_PUSHED;
45181ad6265SDimitry Andric     } else {
45281ad6265SDimitry Andric       // expand deque to push the task which is not allowed to execute
45381ad6265SDimitry Andric       __kmp_realloc_task_deque(thread, thread_data);
45481ad6265SDimitry Andric     }
45581ad6265SDimitry Andric   }
45681ad6265SDimitry Andric   KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
45781ad6265SDimitry Andric                    TASK_DEQUE_SIZE(thread_data->td));
45881ad6265SDimitry Andric   // Push taskdata.
45981ad6265SDimitry Andric   thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
46081ad6265SDimitry Andric   // Wrap index.
46181ad6265SDimitry Andric   thread_data->td.td_deque_tail =
46281ad6265SDimitry Andric       (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
46381ad6265SDimitry Andric   TCW_4(thread_data->td.td_deque_ntasks,
46481ad6265SDimitry Andric         TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
46581ad6265SDimitry Andric   KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self
46681ad6265SDimitry Andric   KMP_FSYNC_RELEASING(taskdata); // releasing child
46781ad6265SDimitry Andric   KA_TRACE(20, ("__kmp_push_priority_task: T#%d returning "
46881ad6265SDimitry Andric                 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",
46981ad6265SDimitry Andric                 gtid, taskdata, thread_data->td.td_deque_ntasks,
47081ad6265SDimitry Andric                 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
47181ad6265SDimitry Andric   __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
47281ad6265SDimitry Andric   task_team->tt.tt_num_task_pri++; // atomic inc
47381ad6265SDimitry Andric   return TASK_SUCCESSFULLY_PUSHED;
47481ad6265SDimitry Andric }
47581ad6265SDimitry Andric 
4760b57cec5SDimitry Andric //  __kmp_push_task: Add a task to the thread's deque
4770b57cec5SDimitry Andric static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
4780b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
4790b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
480e8d8bef9SDimitry Andric 
481349cc55cSDimitry Andric   // If we encounter a hidden helper task, and the current thread is not a
482349cc55cSDimitry Andric   // hidden helper thread, we have to give the task to any hidden helper thread
483349cc55cSDimitry Andric   // starting from its shadow one.
484349cc55cSDimitry Andric   if (UNLIKELY(taskdata->td_flags.hidden_helper &&
485349cc55cSDimitry Andric                !KMP_HIDDEN_HELPER_THREAD(gtid))) {
486349cc55cSDimitry Andric     kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
487349cc55cSDimitry Andric     __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
488349cc55cSDimitry Andric     // Signal the hidden helper threads.
489349cc55cSDimitry Andric     __kmp_hidden_helper_worker_thread_signal();
490349cc55cSDimitry Andric     return TASK_SUCCESSFULLY_PUSHED;
491e8d8bef9SDimitry Andric   }
492e8d8bef9SDimitry Andric 
4930b57cec5SDimitry Andric   kmp_task_team_t *task_team = thread->th.th_task_team;
4940b57cec5SDimitry Andric   kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4950b57cec5SDimitry Andric   kmp_thread_data_t *thread_data;
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric   KA_TRACE(20,
4980b57cec5SDimitry Andric            ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
4990b57cec5SDimitry Andric 
500e8d8bef9SDimitry Andric   if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
5010b57cec5SDimitry Andric     // untied task needs to increment counter so that the task structure is not
5020b57cec5SDimitry Andric     // freed prematurely
5030b57cec5SDimitry Andric     kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
5040b57cec5SDimitry Andric     KMP_DEBUG_USE_VAR(counter);
5050b57cec5SDimitry Andric     KA_TRACE(
5060b57cec5SDimitry Andric         20,
5070b57cec5SDimitry Andric         ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
5080b57cec5SDimitry Andric          gtid, counter, taskdata));
5090b57cec5SDimitry Andric   }
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric   // The first check avoids building task_team thread data if serialized
512e8d8bef9SDimitry Andric   if (UNLIKELY(taskdata->td_flags.task_serial)) {
5130b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning "
5140b57cec5SDimitry Andric                   "TASK_NOT_PUSHED for task %p\n",
5150b57cec5SDimitry Andric                   gtid, taskdata));
5160b57cec5SDimitry Andric     return TASK_NOT_PUSHED;
5170b57cec5SDimitry Andric   }
5180b57cec5SDimitry Andric 
5190b57cec5SDimitry Andric   // Now that serialized tasks have returned, we can assume that we are not in
5200b57cec5SDimitry Andric   // immediate exec mode
5210b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
522e8d8bef9SDimitry Andric   if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
5230b57cec5SDimitry Andric     __kmp_enable_tasking(task_team, thread);
5240b57cec5SDimitry Andric   }
5250b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
5260b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
5270b57cec5SDimitry Andric 
52881ad6265SDimitry Andric   if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
52981ad6265SDimitry Andric       __kmp_max_task_priority > 0) {
53081ad6265SDimitry Andric     int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority);
53181ad6265SDimitry Andric     return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
53281ad6265SDimitry Andric   }
53381ad6265SDimitry Andric 
5340b57cec5SDimitry Andric   // Find tasking deque specific to encountering thread
5350b57cec5SDimitry Andric   thread_data = &task_team->tt.tt_threads_data[tid];
5360b57cec5SDimitry Andric 
537e8d8bef9SDimitry Andric   // No lock needed since only owner can allocate. If the task is hidden_helper,
538e8d8bef9SDimitry Andric   // we don't need it either because we have initialized the dequeue for hidden
539e8d8bef9SDimitry Andric   // helper thread data.
540e8d8bef9SDimitry Andric   if (UNLIKELY(thread_data->td.td_deque == NULL)) {
5410b57cec5SDimitry Andric     __kmp_alloc_task_deque(thread, thread_data);
5420b57cec5SDimitry Andric   }
5430b57cec5SDimitry Andric 
5440b57cec5SDimitry Andric   int locked = 0;
5450b57cec5SDimitry Andric   // Check if deque is full
5460b57cec5SDimitry Andric   if (TCR_4(thread_data->td.td_deque_ntasks) >=
5470b57cec5SDimitry Andric       TASK_DEQUE_SIZE(thread_data->td)) {
5480b57cec5SDimitry Andric     if (__kmp_enable_task_throttling &&
5490b57cec5SDimitry Andric         __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
5500b57cec5SDimitry Andric                               thread->th.th_current_task)) {
5510b57cec5SDimitry Andric       KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
5520b57cec5SDimitry Andric                     "TASK_NOT_PUSHED for task %p\n",
5530b57cec5SDimitry Andric                     gtid, taskdata));
5540b57cec5SDimitry Andric       return TASK_NOT_PUSHED;
5550b57cec5SDimitry Andric     } else {
5560b57cec5SDimitry Andric       __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
5570b57cec5SDimitry Andric       locked = 1;
5585ffd83dbSDimitry Andric       if (TCR_4(thread_data->td.td_deque_ntasks) >=
5595ffd83dbSDimitry Andric           TASK_DEQUE_SIZE(thread_data->td)) {
5600b57cec5SDimitry Andric         // expand deque to push the task which is not allowed to execute
5610b57cec5SDimitry Andric         __kmp_realloc_task_deque(thread, thread_data);
5620b57cec5SDimitry Andric       }
5630b57cec5SDimitry Andric     }
5645ffd83dbSDimitry Andric   }
5650b57cec5SDimitry Andric   // Lock the deque for the task push operation
5660b57cec5SDimitry Andric   if (!locked) {
5670b57cec5SDimitry Andric     __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
5680b57cec5SDimitry Andric     // Need to recheck as we can get a proxy task from thread outside of OpenMP
5690b57cec5SDimitry Andric     if (TCR_4(thread_data->td.td_deque_ntasks) >=
5700b57cec5SDimitry Andric         TASK_DEQUE_SIZE(thread_data->td)) {
5710b57cec5SDimitry Andric       if (__kmp_enable_task_throttling &&
5720b57cec5SDimitry Andric           __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
5730b57cec5SDimitry Andric                                 thread->th.th_current_task)) {
5740b57cec5SDimitry Andric         __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
5750b57cec5SDimitry Andric         KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; "
5760b57cec5SDimitry Andric                       "returning TASK_NOT_PUSHED for task %p\n",
5770b57cec5SDimitry Andric                       gtid, taskdata));
5780b57cec5SDimitry Andric         return TASK_NOT_PUSHED;
5790b57cec5SDimitry Andric       } else {
5800b57cec5SDimitry Andric         // expand deque to push the task which is not allowed to execute
5810b57cec5SDimitry Andric         __kmp_realloc_task_deque(thread, thread_data);
5820b57cec5SDimitry Andric       }
5830b57cec5SDimitry Andric     }
5840b57cec5SDimitry Andric   }
5850b57cec5SDimitry Andric   // Must have room since no thread can add tasks but calling thread
5860b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
5870b57cec5SDimitry Andric                    TASK_DEQUE_SIZE(thread_data->td));
5880b57cec5SDimitry Andric 
5890b57cec5SDimitry Andric   thread_data->td.td_deque[thread_data->td.td_deque_tail] =
5900b57cec5SDimitry Andric       taskdata; // Push taskdata
5910b57cec5SDimitry Andric   // Wrap index.
5920b57cec5SDimitry Andric   thread_data->td.td_deque_tail =
5930b57cec5SDimitry Andric       (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
5940b57cec5SDimitry Andric   TCW_4(thread_data->td.td_deque_ntasks,
5950b57cec5SDimitry Andric         TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
596e8d8bef9SDimitry Andric   KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self
597e8d8bef9SDimitry Andric   KMP_FSYNC_RELEASING(taskdata); // releasing child
5980b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
5990b57cec5SDimitry Andric                 "task=%p ntasks=%d head=%u tail=%u\n",
6000b57cec5SDimitry Andric                 gtid, taskdata, thread_data->td.td_deque_ntasks,
6010b57cec5SDimitry Andric                 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
6020b57cec5SDimitry Andric 
6030b57cec5SDimitry Andric   __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
6040b57cec5SDimitry Andric 
6050b57cec5SDimitry Andric   return TASK_SUCCESSFULLY_PUSHED;
6060b57cec5SDimitry Andric }
6070b57cec5SDimitry Andric 
6080b57cec5SDimitry Andric // __kmp_pop_current_task_from_thread: set up current task from called thread
6090b57cec5SDimitry Andric // when team ends
6100b57cec5SDimitry Andric //
6110b57cec5SDimitry Andric // this_thr: thread structure to set current_task in.
6120b57cec5SDimitry Andric void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
6130b57cec5SDimitry Andric   KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d "
6140b57cec5SDimitry Andric                 "this_thread=%p, curtask=%p, "
6150b57cec5SDimitry Andric                 "curtask_parent=%p\n",
6160b57cec5SDimitry Andric                 0, this_thr, this_thr->th.th_current_task,
6170b57cec5SDimitry Andric                 this_thr->th.th_current_task->td_parent));
6180b57cec5SDimitry Andric 
6190b57cec5SDimitry Andric   this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
6200b57cec5SDimitry Andric 
6210b57cec5SDimitry Andric   KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d "
6220b57cec5SDimitry Andric                 "this_thread=%p, curtask=%p, "
6230b57cec5SDimitry Andric                 "curtask_parent=%p\n",
6240b57cec5SDimitry Andric                 0, this_thr, this_thr->th.th_current_task,
6250b57cec5SDimitry Andric                 this_thr->th.th_current_task->td_parent));
6260b57cec5SDimitry Andric }
6270b57cec5SDimitry Andric 
6280b57cec5SDimitry Andric // __kmp_push_current_task_to_thread: set up current task in called thread for a
6290b57cec5SDimitry Andric // new team
6300b57cec5SDimitry Andric //
6310b57cec5SDimitry Andric // this_thr: thread structure to set up
6320b57cec5SDimitry Andric // team: team for implicit task data
6330b57cec5SDimitry Andric // tid: thread within team to set up
6340b57cec5SDimitry Andric void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
6350b57cec5SDimitry Andric                                        int tid) {
6360b57cec5SDimitry Andric   // current task of the thread is a parent of the new just created implicit
6370b57cec5SDimitry Andric   // tasks of new team
6380b57cec5SDimitry Andric   KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
6390b57cec5SDimitry Andric                 "curtask=%p "
6400b57cec5SDimitry Andric                 "parent_task=%p\n",
6410b57cec5SDimitry Andric                 tid, this_thr, this_thr->th.th_current_task,
6420b57cec5SDimitry Andric                 team->t.t_implicit_task_taskdata[tid].td_parent));
6430b57cec5SDimitry Andric 
6440b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(this_thr != NULL);
6450b57cec5SDimitry Andric 
6460b57cec5SDimitry Andric   if (tid == 0) {
6470b57cec5SDimitry Andric     if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
6480b57cec5SDimitry Andric       team->t.t_implicit_task_taskdata[0].td_parent =
6490b57cec5SDimitry Andric           this_thr->th.th_current_task;
6500b57cec5SDimitry Andric       this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
6510b57cec5SDimitry Andric     }
6520b57cec5SDimitry Andric   } else {
6530b57cec5SDimitry Andric     team->t.t_implicit_task_taskdata[tid].td_parent =
6540b57cec5SDimitry Andric         team->t.t_implicit_task_taskdata[0].td_parent;
6550b57cec5SDimitry Andric     this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
6560b57cec5SDimitry Andric   }
6570b57cec5SDimitry Andric 
6580b57cec5SDimitry Andric   KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
6590b57cec5SDimitry Andric                 "curtask=%p "
6600b57cec5SDimitry Andric                 "parent_task=%p\n",
6610b57cec5SDimitry Andric                 tid, this_thr, this_thr->th.th_current_task,
6620b57cec5SDimitry Andric                 team->t.t_implicit_task_taskdata[tid].td_parent));
6630b57cec5SDimitry Andric }
6640b57cec5SDimitry Andric 
6650b57cec5SDimitry Andric // __kmp_task_start: bookkeeping for a task starting execution
6660b57cec5SDimitry Andric //
6670b57cec5SDimitry Andric // GTID: global thread id of calling thread
6680b57cec5SDimitry Andric // task: task starting execution
6690b57cec5SDimitry Andric // current_task: task suspending
6700b57cec5SDimitry Andric static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
6710b57cec5SDimitry Andric                              kmp_taskdata_t *current_task) {
6720b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
6730b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
6740b57cec5SDimitry Andric 
6750b57cec5SDimitry Andric   KA_TRACE(10,
6760b57cec5SDimitry Andric            ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
6770b57cec5SDimitry Andric             gtid, taskdata, current_task));
6780b57cec5SDimitry Andric 
6790b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
6800b57cec5SDimitry Andric 
6810b57cec5SDimitry Andric   // mark currently executing task as suspended
6820b57cec5SDimitry Andric   // TODO: GEH - make sure root team implicit task is initialized properly.
6830b57cec5SDimitry Andric   // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
6840b57cec5SDimitry Andric   current_task->td_flags.executing = 0;
6850b57cec5SDimitry Andric 
6860b57cec5SDimitry Andric // Add task to stack if tied
6870b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
6880b57cec5SDimitry Andric   if (taskdata->td_flags.tiedness == TASK_TIED) {
6890b57cec5SDimitry Andric     __kmp_push_task_stack(gtid, thread, taskdata);
6900b57cec5SDimitry Andric   }
6910b57cec5SDimitry Andric #endif /* BUILD_TIED_TASK_STACK */
6920b57cec5SDimitry Andric 
6930b57cec5SDimitry Andric   // mark starting task as executing and as current task
6940b57cec5SDimitry Andric   thread->th.th_current_task = taskdata;
6950b57cec5SDimitry Andric 
6960b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
6970b57cec5SDimitry Andric                    taskdata->td_flags.tiedness == TASK_UNTIED);
6980b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
6990b57cec5SDimitry Andric                    taskdata->td_flags.tiedness == TASK_UNTIED);
7000b57cec5SDimitry Andric   taskdata->td_flags.started = 1;
7010b57cec5SDimitry Andric   taskdata->td_flags.executing = 1;
7020b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
7030b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
7040b57cec5SDimitry Andric 
7050b57cec5SDimitry Andric   // GEH TODO: shouldn't we pass some sort of location identifier here?
7060b57cec5SDimitry Andric   // APT: yes, we will pass location here.
7070b57cec5SDimitry Andric   // need to store current thread state (in a thread or taskdata structure)
7080b57cec5SDimitry Andric   // before setting work_state, otherwise wrong state is set after end of task
7090b57cec5SDimitry Andric 
7100b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
7110b57cec5SDimitry Andric 
7120b57cec5SDimitry Andric   return;
7130b57cec5SDimitry Andric }
7140b57cec5SDimitry Andric 
7150b57cec5SDimitry Andric #if OMPT_SUPPORT
7160b57cec5SDimitry Andric //------------------------------------------------------------------------------
7170b57cec5SDimitry Andric // __ompt_task_init:
7180b57cec5SDimitry Andric //   Initialize OMPT fields maintained by a task. This will only be called after
7190b57cec5SDimitry Andric //   ompt_start_tool, so we already know whether ompt is enabled or not.
7200b57cec5SDimitry Andric 
7210b57cec5SDimitry Andric static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
7220b57cec5SDimitry Andric   // The calls to __ompt_task_init already have the ompt_enabled condition.
7230b57cec5SDimitry Andric   task->ompt_task_info.task_data.value = 0;
7240b57cec5SDimitry Andric   task->ompt_task_info.frame.exit_frame = ompt_data_none;
7250b57cec5SDimitry Andric   task->ompt_task_info.frame.enter_frame = ompt_data_none;
726fe6060f1SDimitry Andric   task->ompt_task_info.frame.exit_frame_flags =
727fe6060f1SDimitry Andric       ompt_frame_runtime | ompt_frame_framepointer;
728fe6060f1SDimitry Andric   task->ompt_task_info.frame.enter_frame_flags =
729fe6060f1SDimitry Andric       ompt_frame_runtime | ompt_frame_framepointer;
73081ad6265SDimitry Andric   task->ompt_task_info.dispatch_chunk.start = 0;
73181ad6265SDimitry Andric   task->ompt_task_info.dispatch_chunk.iterations = 0;
7320b57cec5SDimitry Andric }
7330b57cec5SDimitry Andric 
7340b57cec5SDimitry Andric // __ompt_task_start:
7350b57cec5SDimitry Andric //   Build and trigger task-begin event
7360b57cec5SDimitry Andric static inline void __ompt_task_start(kmp_task_t *task,
7370b57cec5SDimitry Andric                                      kmp_taskdata_t *current_task,
7380b57cec5SDimitry Andric                                      kmp_int32 gtid) {
7390b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
7400b57cec5SDimitry Andric   ompt_task_status_t status = ompt_task_switch;
7410b57cec5SDimitry Andric   if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
7420b57cec5SDimitry Andric     status = ompt_task_yield;
7430b57cec5SDimitry Andric     __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
7440b57cec5SDimitry Andric   }
7450b57cec5SDimitry Andric   /* let OMPT know that we're about to run this task */
7460b57cec5SDimitry Andric   if (ompt_enabled.ompt_callback_task_schedule) {
7470b57cec5SDimitry Andric     ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
7480b57cec5SDimitry Andric         &(current_task->ompt_task_info.task_data), status,
7490b57cec5SDimitry Andric         &(taskdata->ompt_task_info.task_data));
7500b57cec5SDimitry Andric   }
7510b57cec5SDimitry Andric   taskdata->ompt_task_info.scheduling_parent = current_task;
7520b57cec5SDimitry Andric }
7530b57cec5SDimitry Andric 
7540b57cec5SDimitry Andric // __ompt_task_finish:
7550b57cec5SDimitry Andric //   Build and trigger final task-schedule event
7565ffd83dbSDimitry Andric static inline void __ompt_task_finish(kmp_task_t *task,
7575ffd83dbSDimitry Andric                                       kmp_taskdata_t *resumed_task,
7585ffd83dbSDimitry Andric                                       ompt_task_status_t status) {
7595ffd83dbSDimitry Andric   if (ompt_enabled.ompt_callback_task_schedule) {
7600b57cec5SDimitry Andric     kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
7610b57cec5SDimitry Andric     if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
7620b57cec5SDimitry Andric         taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
7630b57cec5SDimitry Andric       status = ompt_task_cancel;
7640b57cec5SDimitry Andric     }
7650b57cec5SDimitry Andric 
7660b57cec5SDimitry Andric     /* let OMPT know that we're returning to the callee task */
7670b57cec5SDimitry Andric     ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
7680b57cec5SDimitry Andric         &(taskdata->ompt_task_info.task_data), status,
7695ffd83dbSDimitry Andric         (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
7700b57cec5SDimitry Andric   }
7710b57cec5SDimitry Andric }
7720b57cec5SDimitry Andric #endif
7730b57cec5SDimitry Andric 
7740b57cec5SDimitry Andric template <bool ompt>
7750b57cec5SDimitry Andric static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid,
7760b57cec5SDimitry Andric                                                kmp_task_t *task,
7770b57cec5SDimitry Andric                                                void *frame_address,
7780b57cec5SDimitry Andric                                                void *return_address) {
7790b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
7800b57cec5SDimitry Andric   kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
7810b57cec5SDimitry Andric 
7820b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
7830b57cec5SDimitry Andric                 "current_task=%p\n",
7840b57cec5SDimitry Andric                 gtid, loc_ref, taskdata, current_task));
7850b57cec5SDimitry Andric 
786fe6060f1SDimitry Andric   if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
7870b57cec5SDimitry Andric     // untied task needs to increment counter so that the task structure is not
7880b57cec5SDimitry Andric     // freed prematurely
7890b57cec5SDimitry Andric     kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
7900b57cec5SDimitry Andric     KMP_DEBUG_USE_VAR(counter);
7910b57cec5SDimitry Andric     KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
7920b57cec5SDimitry Andric                   "incremented for task %p\n",
7930b57cec5SDimitry Andric                   gtid, counter, taskdata));
7940b57cec5SDimitry Andric   }
7950b57cec5SDimitry Andric 
7960b57cec5SDimitry Andric   taskdata->td_flags.task_serial =
7970b57cec5SDimitry Andric       1; // Execute this task immediately, not deferred.
7980b57cec5SDimitry Andric   __kmp_task_start(gtid, task, current_task);
7990b57cec5SDimitry Andric 
8000b57cec5SDimitry Andric #if OMPT_SUPPORT
8010b57cec5SDimitry Andric   if (ompt) {
8020b57cec5SDimitry Andric     if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
8030b57cec5SDimitry Andric       current_task->ompt_task_info.frame.enter_frame.ptr =
8040b57cec5SDimitry Andric           taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
8050b57cec5SDimitry Andric       current_task->ompt_task_info.frame.enter_frame_flags =
806fe6060f1SDimitry Andric           taskdata->ompt_task_info.frame.exit_frame_flags =
807fe6060f1SDimitry Andric               ompt_frame_application | ompt_frame_framepointer;
8080b57cec5SDimitry Andric     }
8090b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_task_create) {
8100b57cec5SDimitry Andric       ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
8110b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_task_create)(
8120b57cec5SDimitry Andric           &(parent_info->task_data), &(parent_info->frame),
8130b57cec5SDimitry Andric           &(taskdata->ompt_task_info.task_data),
814*0fca6ea1SDimitry Andric           TASK_TYPE_DETAILS_FORMAT(taskdata), 0, return_address);
8150b57cec5SDimitry Andric     }
8160b57cec5SDimitry Andric     __ompt_task_start(task, current_task, gtid);
8170b57cec5SDimitry Andric   }
8180b57cec5SDimitry Andric #endif // OMPT_SUPPORT
8190b57cec5SDimitry Andric 
8200b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
8210b57cec5SDimitry Andric                 loc_ref, taskdata));
8220b57cec5SDimitry Andric }
8230b57cec5SDimitry Andric 
8240b57cec5SDimitry Andric #if OMPT_SUPPORT
8250b57cec5SDimitry Andric OMPT_NOINLINE
8260b57cec5SDimitry Andric static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
8270b57cec5SDimitry Andric                                            kmp_task_t *task,
8280b57cec5SDimitry Andric                                            void *frame_address,
8290b57cec5SDimitry Andric                                            void *return_address) {
8300b57cec5SDimitry Andric   __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
8310b57cec5SDimitry Andric                                            return_address);
8320b57cec5SDimitry Andric }
8330b57cec5SDimitry Andric #endif // OMPT_SUPPORT
8340b57cec5SDimitry Andric 
8350b57cec5SDimitry Andric // __kmpc_omp_task_begin_if0: report that a given serialized task has started
8360b57cec5SDimitry Andric // execution
8370b57cec5SDimitry Andric //
8380b57cec5SDimitry Andric // loc_ref: source location information; points to beginning of task block.
8390b57cec5SDimitry Andric // gtid: global thread number.
8400b57cec5SDimitry Andric // task: task thunk for the started task.
8415f757f3fSDimitry Andric #ifdef __s390x__
8425f757f3fSDimitry Andric // This is required for OMPT_GET_FRAME_ADDRESS(1) to compile on s390x.
8435f757f3fSDimitry Andric // In order for it to work correctly, the caller also needs to be compiled with
8445f757f3fSDimitry Andric // backchain. If a caller is compiled without backchain,
8455f757f3fSDimitry Andric // OMPT_GET_FRAME_ADDRESS(1) will produce an incorrect value, but will not
8465f757f3fSDimitry Andric // crash.
8475f757f3fSDimitry Andric __attribute__((target("backchain")))
8485f757f3fSDimitry Andric #endif
8490b57cec5SDimitry Andric void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
8500b57cec5SDimitry Andric                                kmp_task_t *task) {
8510b57cec5SDimitry Andric #if OMPT_SUPPORT
8520b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled)) {
8530b57cec5SDimitry Andric     OMPT_STORE_RETURN_ADDRESS(gtid);
8540b57cec5SDimitry Andric     __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
8550b57cec5SDimitry Andric                                    OMPT_GET_FRAME_ADDRESS(1),
8560b57cec5SDimitry Andric                                    OMPT_LOAD_RETURN_ADDRESS(gtid));
8570b57cec5SDimitry Andric     return;
8580b57cec5SDimitry Andric   }
8590b57cec5SDimitry Andric #endif
8600b57cec5SDimitry Andric   __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
8610b57cec5SDimitry Andric }
8620b57cec5SDimitry Andric 
8630b57cec5SDimitry Andric #ifdef TASK_UNUSED
8640b57cec5SDimitry Andric // __kmpc_omp_task_begin: report that a given task has started execution
8650b57cec5SDimitry Andric // NEVER GENERATED BY COMPILER, DEPRECATED!!!
8660b57cec5SDimitry Andric void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
8670b57cec5SDimitry Andric   kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
8680b57cec5SDimitry Andric 
8690b57cec5SDimitry Andric   KA_TRACE(
8700b57cec5SDimitry Andric       10,
8710b57cec5SDimitry Andric       ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
8720b57cec5SDimitry Andric        gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
8730b57cec5SDimitry Andric 
8740b57cec5SDimitry Andric   __kmp_task_start(gtid, task, current_task);
8750b57cec5SDimitry Andric 
8760b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
8770b57cec5SDimitry Andric                 loc_ref, KMP_TASK_TO_TASKDATA(task)));
8780b57cec5SDimitry Andric   return;
8790b57cec5SDimitry Andric }
8800b57cec5SDimitry Andric #endif // TASK_UNUSED
8810b57cec5SDimitry Andric 
8820b57cec5SDimitry Andric // __kmp_free_task: free the current task space and the space for shareds
8830b57cec5SDimitry Andric //
8840b57cec5SDimitry Andric // gtid: Global thread ID of calling thread
8850b57cec5SDimitry Andric // taskdata: task to free
8860b57cec5SDimitry Andric // thread: thread data structure of caller
8870b57cec5SDimitry Andric static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
8880b57cec5SDimitry Andric                             kmp_info_t *thread) {
8890b57cec5SDimitry Andric   KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid,
8900b57cec5SDimitry Andric                 taskdata));
8910b57cec5SDimitry Andric 
8920b57cec5SDimitry Andric   // Check to make sure all flags and counters have the correct values
8930b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
8940b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
8950b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
8960b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
8970b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
8980b57cec5SDimitry Andric                    taskdata->td_flags.task_serial == 1);
8990b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
90081ad6265SDimitry Andric   kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata);
90181ad6265SDimitry Andric   // Clear data to not be re-used later by mistake.
90281ad6265SDimitry Andric   task->data1.destructors = NULL;
90381ad6265SDimitry Andric   task->data2.priority = 0;
9040b57cec5SDimitry Andric 
9050b57cec5SDimitry Andric   taskdata->td_flags.freed = 1;
90606c3fb27SDimitry Andric #if OMPX_TASKGRAPH
90706c3fb27SDimitry Andric   // do not free tasks in taskgraph
90806c3fb27SDimitry Andric   if (!taskdata->is_taskgraph) {
90906c3fb27SDimitry Andric #endif
9100b57cec5SDimitry Andric // deallocate the taskdata and shared variable blocks associated with this task
9110b57cec5SDimitry Andric #if USE_FAST_MEMORY
9120b57cec5SDimitry Andric   __kmp_fast_free(thread, taskdata);
9130b57cec5SDimitry Andric #else /* ! USE_FAST_MEMORY */
9140b57cec5SDimitry Andric   __kmp_thread_free(thread, taskdata);
9150b57cec5SDimitry Andric #endif
91606c3fb27SDimitry Andric #if OMPX_TASKGRAPH
91706c3fb27SDimitry Andric   } else {
91806c3fb27SDimitry Andric     taskdata->td_flags.complete = 0;
91906c3fb27SDimitry Andric     taskdata->td_flags.started = 0;
92006c3fb27SDimitry Andric     taskdata->td_flags.freed = 0;
92106c3fb27SDimitry Andric     taskdata->td_flags.executing = 0;
92206c3fb27SDimitry Andric     taskdata->td_flags.task_serial =
92306c3fb27SDimitry Andric         (taskdata->td_parent->td_flags.final ||
92406c3fb27SDimitry Andric           taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser);
92506c3fb27SDimitry Andric 
92606c3fb27SDimitry Andric     // taskdata->td_allow_completion_event.pending_events_count = 1;
92706c3fb27SDimitry Andric     KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
92806c3fb27SDimitry Andric     KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
92906c3fb27SDimitry Andric     // start at one because counts current task and children
93006c3fb27SDimitry Andric     KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
93106c3fb27SDimitry Andric   }
93206c3fb27SDimitry Andric #endif
93306c3fb27SDimitry Andric 
9340b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
9350b57cec5SDimitry Andric }
9360b57cec5SDimitry Andric 
9370b57cec5SDimitry Andric // __kmp_free_task_and_ancestors: free the current task and ancestors without
9380b57cec5SDimitry Andric // children
9390b57cec5SDimitry Andric //
9400b57cec5SDimitry Andric // gtid: Global thread ID of calling thread
9410b57cec5SDimitry Andric // taskdata: task to free
9420b57cec5SDimitry Andric // thread: thread data structure of caller
9430b57cec5SDimitry Andric static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
9440b57cec5SDimitry Andric                                           kmp_taskdata_t *taskdata,
9450b57cec5SDimitry Andric                                           kmp_info_t *thread) {
9460b57cec5SDimitry Andric   // Proxy tasks must always be allowed to free their parents
9470b57cec5SDimitry Andric   // because they can be run in background even in serial mode.
9480b57cec5SDimitry Andric   kmp_int32 team_serial =
9490b57cec5SDimitry Andric       (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
9500b57cec5SDimitry Andric       !taskdata->td_flags.proxy;
9510b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
9520b57cec5SDimitry Andric 
9530b57cec5SDimitry Andric   kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
9540b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(children >= 0);
9550b57cec5SDimitry Andric 
9560b57cec5SDimitry Andric   // Now, go up the ancestor tree to see if any ancestors can now be freed.
9570b57cec5SDimitry Andric   while (children == 0) {
9580b57cec5SDimitry Andric     kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
9590b57cec5SDimitry Andric 
9600b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
9610b57cec5SDimitry Andric                   "and freeing itself\n",
9620b57cec5SDimitry Andric                   gtid, taskdata));
9630b57cec5SDimitry Andric 
9640b57cec5SDimitry Andric     // --- Deallocate my ancestor task ---
9650b57cec5SDimitry Andric     __kmp_free_task(gtid, taskdata, thread);
9660b57cec5SDimitry Andric 
9670b57cec5SDimitry Andric     taskdata = parent_taskdata;
9680b57cec5SDimitry Andric 
9690b57cec5SDimitry Andric     if (team_serial)
9700b57cec5SDimitry Andric       return;
9710b57cec5SDimitry Andric     // Stop checking ancestors at implicit task instead of walking up ancestor
9720b57cec5SDimitry Andric     // tree to avoid premature deallocation of ancestors.
9730b57cec5SDimitry Andric     if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
9740b57cec5SDimitry Andric       if (taskdata->td_dephash) { // do we need to cleanup dephash?
9750b57cec5SDimitry Andric         int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
9760b57cec5SDimitry Andric         kmp_tasking_flags_t flags_old = taskdata->td_flags;
9770b57cec5SDimitry Andric         if (children == 0 && flags_old.complete == 1) {
9780b57cec5SDimitry Andric           kmp_tasking_flags_t flags_new = flags_old;
9790b57cec5SDimitry Andric           flags_new.complete = 0;
9800b57cec5SDimitry Andric           if (KMP_COMPARE_AND_STORE_ACQ32(
9810b57cec5SDimitry Andric                   RCAST(kmp_int32 *, &taskdata->td_flags),
9820b57cec5SDimitry Andric                   *RCAST(kmp_int32 *, &flags_old),
9830b57cec5SDimitry Andric                   *RCAST(kmp_int32 *, &flags_new))) {
9840b57cec5SDimitry Andric             KA_TRACE(100, ("__kmp_free_task_and_ancestors: T#%d cleans "
9850b57cec5SDimitry Andric                            "dephash of implicit task %p\n",
9860b57cec5SDimitry Andric                            gtid, taskdata));
9870b57cec5SDimitry Andric             // cleanup dephash of finished implicit task
9880b57cec5SDimitry Andric             __kmp_dephash_free_entries(thread, taskdata->td_dephash);
9890b57cec5SDimitry Andric           }
9900b57cec5SDimitry Andric         }
9910b57cec5SDimitry Andric       }
9920b57cec5SDimitry Andric       return;
9930b57cec5SDimitry Andric     }
9940b57cec5SDimitry Andric     // Predecrement simulated by "- 1" calculation
9950b57cec5SDimitry Andric     children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
9960b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(children >= 0);
9970b57cec5SDimitry Andric   }
9980b57cec5SDimitry Andric 
9990b57cec5SDimitry Andric   KA_TRACE(
10000b57cec5SDimitry Andric       20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
10010b57cec5SDimitry Andric            "not freeing it yet\n",
10020b57cec5SDimitry Andric            gtid, taskdata, children));
10030b57cec5SDimitry Andric }
10040b57cec5SDimitry Andric 
1005349cc55cSDimitry Andric // Only need to keep track of child task counts if any of the following:
1006349cc55cSDimitry Andric // 1. team parallel and tasking not serialized;
1007349cc55cSDimitry Andric // 2. it is a proxy or detachable or hidden helper task
1008349cc55cSDimitry Andric // 3. the children counter of its parent task is greater than 0.
1009349cc55cSDimitry Andric // The reason for the 3rd one is for serialized team that found detached task,
1010349cc55cSDimitry Andric // hidden helper task, T. In this case, the execution of T is still deferred,
1011349cc55cSDimitry Andric // and it is also possible that a regular task depends on T. In this case, if we
1012349cc55cSDimitry Andric // don't track the children, task synchronization will be broken.
1013349cc55cSDimitry Andric static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
1014349cc55cSDimitry Andric   kmp_tasking_flags_t flags = taskdata->td_flags;
1015349cc55cSDimitry Andric   bool ret = !(flags.team_serial || flags.tasking_ser);
1016349cc55cSDimitry Andric   ret = ret || flags.proxy == TASK_PROXY ||
1017349cc55cSDimitry Andric         flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
1018349cc55cSDimitry Andric   ret = ret ||
1019349cc55cSDimitry Andric         KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
102006c3fb27SDimitry Andric #if OMPX_TASKGRAPH
102106c3fb27SDimitry Andric   if (taskdata->td_taskgroup && taskdata->is_taskgraph)
102206c3fb27SDimitry Andric     ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_taskgroup->count) > 0;
102306c3fb27SDimitry Andric #endif
1024349cc55cSDimitry Andric   return ret;
1025349cc55cSDimitry Andric }
1026349cc55cSDimitry Andric 
10270b57cec5SDimitry Andric // __kmp_task_finish: bookkeeping to do when a task finishes execution
10280b57cec5SDimitry Andric //
10290b57cec5SDimitry Andric // gtid: global thread ID for calling thread
10300b57cec5SDimitry Andric // task: task to be finished
10310b57cec5SDimitry Andric // resumed_task: task to be resumed.  (may be NULL if task is serialized)
10325ffd83dbSDimitry Andric //
10335ffd83dbSDimitry Andric // template<ompt>: effectively ompt_enabled.enabled!=0
10345ffd83dbSDimitry Andric // the version with ompt=false is inlined, allowing to optimize away all ompt
10355ffd83dbSDimitry Andric // code in this case
10360b57cec5SDimitry Andric template <bool ompt>
10370b57cec5SDimitry Andric static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
10380b57cec5SDimitry Andric                               kmp_taskdata_t *resumed_task) {
10390b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
10400b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
10410b57cec5SDimitry Andric   kmp_task_team_t *task_team =
10420b57cec5SDimitry Andric       thread->th.th_task_team; // might be NULL for serial teams...
104306c3fb27SDimitry Andric #if OMPX_TASKGRAPH
104406c3fb27SDimitry Andric   // to avoid seg fault when we need to access taskdata->td_flags after free when using vanilla taskloop
104506c3fb27SDimitry Andric   bool is_taskgraph;
104606c3fb27SDimitry Andric #endif
1047349cc55cSDimitry Andric #if KMP_DEBUG
10480b57cec5SDimitry Andric   kmp_int32 children = 0;
1049349cc55cSDimitry Andric #endif
10500b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
10510b57cec5SDimitry Andric                 "task %p\n",
10520b57cec5SDimitry Andric                 gtid, taskdata, resumed_task));
10530b57cec5SDimitry Andric 
10540b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
10550b57cec5SDimitry Andric 
105606c3fb27SDimitry Andric #if OMPX_TASKGRAPH
105706c3fb27SDimitry Andric   is_taskgraph = taskdata->is_taskgraph;
105806c3fb27SDimitry Andric #endif
105906c3fb27SDimitry Andric 
10600b57cec5SDimitry Andric // Pop task from stack if tied
10610b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
10620b57cec5SDimitry Andric   if (taskdata->td_flags.tiedness == TASK_TIED) {
10630b57cec5SDimitry Andric     __kmp_pop_task_stack(gtid, thread, taskdata);
10640b57cec5SDimitry Andric   }
10650b57cec5SDimitry Andric #endif /* BUILD_TIED_TASK_STACK */
10660b57cec5SDimitry Andric 
1067e8d8bef9SDimitry Andric   if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
10680b57cec5SDimitry Andric     // untied task needs to check the counter so that the task structure is not
10690b57cec5SDimitry Andric     // freed prematurely
10700b57cec5SDimitry Andric     kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
10710b57cec5SDimitry Andric     KA_TRACE(
10720b57cec5SDimitry Andric         20,
10730b57cec5SDimitry Andric         ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
10740b57cec5SDimitry Andric          gtid, counter, taskdata));
10750b57cec5SDimitry Andric     if (counter > 0) {
10760b57cec5SDimitry Andric       // untied task is not done, to be continued possibly by other thread, do
10770b57cec5SDimitry Andric       // not free it now
10780b57cec5SDimitry Andric       if (resumed_task == NULL) {
10790b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
10800b57cec5SDimitry Andric         resumed_task = taskdata->td_parent; // In a serialized task, the resumed
10810b57cec5SDimitry Andric         // task is the parent
10820b57cec5SDimitry Andric       }
10830b57cec5SDimitry Andric       thread->th.th_current_task = resumed_task; // restore current_task
10840b57cec5SDimitry Andric       resumed_task->td_flags.executing = 1; // resume previous task
10850b57cec5SDimitry Andric       KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, "
10860b57cec5SDimitry Andric                     "resuming task %p\n",
10870b57cec5SDimitry Andric                     gtid, taskdata, resumed_task));
10880b57cec5SDimitry Andric       return;
10890b57cec5SDimitry Andric     }
10900b57cec5SDimitry Andric   }
10910b57cec5SDimitry Andric 
10920b57cec5SDimitry Andric   // bookkeeping for resuming task:
10930b57cec5SDimitry Andric   // GEH - note tasking_ser => task_serial
10940b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(
10950b57cec5SDimitry Andric       (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
10960b57cec5SDimitry Andric       taskdata->td_flags.task_serial);
10970b57cec5SDimitry Andric   if (taskdata->td_flags.task_serial) {
10980b57cec5SDimitry Andric     if (resumed_task == NULL) {
10990b57cec5SDimitry Andric       resumed_task = taskdata->td_parent; // In a serialized task, the resumed
11000b57cec5SDimitry Andric       // task is the parent
11010b57cec5SDimitry Andric     }
11020b57cec5SDimitry Andric   } else {
11030b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(resumed_task !=
1104480093f4SDimitry Andric                      NULL); // verify that resumed task is passed as argument
11050b57cec5SDimitry Andric   }
11060b57cec5SDimitry Andric 
11075ffd83dbSDimitry Andric   /* If the tasks' destructor thunk flag has been set, we need to invoke the
11085ffd83dbSDimitry Andric      destructor thunk that has been generated by the compiler. The code is
11095ffd83dbSDimitry Andric      placed here, since at this point other tasks might have been released
11105ffd83dbSDimitry Andric      hence overlapping the destructor invocations with some other work in the
11115ffd83dbSDimitry Andric      released tasks.  The OpenMP spec is not specific on when the destructors
11125ffd83dbSDimitry Andric      are invoked, so we should be free to choose. */
1113fe6060f1SDimitry Andric   if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
11145ffd83dbSDimitry Andric     kmp_routine_entry_t destr_thunk = task->data1.destructors;
11155ffd83dbSDimitry Andric     KMP_ASSERT(destr_thunk);
11165ffd83dbSDimitry Andric     destr_thunk(gtid, task);
11175ffd83dbSDimitry Andric   }
11185ffd83dbSDimitry Andric 
11195ffd83dbSDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
11205ffd83dbSDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
11215ffd83dbSDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
11225ffd83dbSDimitry Andric 
1123bdd1243dSDimitry Andric   bool completed = true;
1124fe6060f1SDimitry Andric   if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
11255ffd83dbSDimitry Andric     if (taskdata->td_allow_completion_event.type ==
11265ffd83dbSDimitry Andric         KMP_EVENT_ALLOW_COMPLETION) {
11275ffd83dbSDimitry Andric       // event hasn't been fulfilled yet. Try to detach task.
11285ffd83dbSDimitry Andric       __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
11295ffd83dbSDimitry Andric       if (taskdata->td_allow_completion_event.type ==
11305ffd83dbSDimitry Andric           KMP_EVENT_ALLOW_COMPLETION) {
11315ffd83dbSDimitry Andric         // task finished execution
11325ffd83dbSDimitry Andric         KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
11335ffd83dbSDimitry Andric         taskdata->td_flags.executing = 0; // suspend the finishing task
11345ffd83dbSDimitry Andric 
11355ffd83dbSDimitry Andric #if OMPT_SUPPORT
11365ffd83dbSDimitry Andric         // For a detached task, which is not completed, we switch back
11375ffd83dbSDimitry Andric         // the omp_fulfill_event signals completion
11385ffd83dbSDimitry Andric         // locking is necessary to avoid a race with ompt_task_late_fulfill
11395ffd83dbSDimitry Andric         if (ompt)
11405ffd83dbSDimitry Andric           __ompt_task_finish(task, resumed_task, ompt_task_detach);
11415ffd83dbSDimitry Andric #endif
11425ffd83dbSDimitry Andric 
11435ffd83dbSDimitry Andric         // no access to taskdata after this point!
11445ffd83dbSDimitry Andric         // __kmp_fulfill_event might free taskdata at any time from now
11455ffd83dbSDimitry Andric 
11465ffd83dbSDimitry Andric         taskdata->td_flags.proxy = TASK_PROXY; // proxify!
1147bdd1243dSDimitry Andric         completed = false;
11485ffd83dbSDimitry Andric       }
11495ffd83dbSDimitry Andric       __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
11505ffd83dbSDimitry Andric     }
11515ffd83dbSDimitry Andric   }
11525ffd83dbSDimitry Andric 
1153bdd1243dSDimitry Andric   // Tasks with valid target async handles must be re-enqueued.
1154bdd1243dSDimitry Andric   if (taskdata->td_target_data.async_handle != NULL) {
1155bdd1243dSDimitry Andric     // Note: no need to translate gtid to its shadow. If the current thread is a
1156bdd1243dSDimitry Andric     // hidden helper one, then the gtid is already correct. Otherwise, hidden
1157bdd1243dSDimitry Andric     // helper threads are disabled, and gtid refers to a OpenMP thread.
1158*0fca6ea1SDimitry Andric #if OMPT_SUPPORT
1159*0fca6ea1SDimitry Andric     if (ompt) {
1160*0fca6ea1SDimitry Andric       __ompt_task_finish(task, resumed_task, ompt_task_switch);
1161*0fca6ea1SDimitry Andric     }
1162*0fca6ea1SDimitry Andric #endif
1163bdd1243dSDimitry Andric     __kmpc_give_task(task, __kmp_tid_from_gtid(gtid));
1164bdd1243dSDimitry Andric     if (KMP_HIDDEN_HELPER_THREAD(gtid))
1165bdd1243dSDimitry Andric       __kmp_hidden_helper_worker_thread_signal();
1166bdd1243dSDimitry Andric     completed = false;
1167bdd1243dSDimitry Andric   }
1168bdd1243dSDimitry Andric 
1169bdd1243dSDimitry Andric   if (completed) {
11705ffd83dbSDimitry Andric     taskdata->td_flags.complete = 1; // mark the task as completed
117106c3fb27SDimitry Andric #if OMPX_TASKGRAPH
117206c3fb27SDimitry Andric     taskdata->td_flags.onced = 1; // mark the task as ran once already
117306c3fb27SDimitry Andric #endif
11745ffd83dbSDimitry Andric 
11755ffd83dbSDimitry Andric #if OMPT_SUPPORT
11765ffd83dbSDimitry Andric     // This is not a detached task, we are done here
11775ffd83dbSDimitry Andric     if (ompt)
11785ffd83dbSDimitry Andric       __ompt_task_finish(task, resumed_task, ompt_task_complete);
11795ffd83dbSDimitry Andric #endif
1180349cc55cSDimitry Andric     // TODO: What would be the balance between the conditions in the function
1181349cc55cSDimitry Andric     // and an atomic operation?
1182349cc55cSDimitry Andric     if (__kmp_track_children_task(taskdata)) {
1183fe6060f1SDimitry Andric       __kmp_release_deps(gtid, taskdata);
11845ffd83dbSDimitry Andric       // Predecrement simulated by "- 1" calculation
1185349cc55cSDimitry Andric #if KMP_DEBUG
1186349cc55cSDimitry Andric       children = -1 +
1187349cc55cSDimitry Andric #endif
1188349cc55cSDimitry Andric           KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
11895ffd83dbSDimitry Andric       KMP_DEBUG_ASSERT(children >= 0);
119006c3fb27SDimitry Andric #if OMPX_TASKGRAPH
119106c3fb27SDimitry Andric       if (taskdata->td_taskgroup && !taskdata->is_taskgraph)
119206c3fb27SDimitry Andric #else
11935ffd83dbSDimitry Andric       if (taskdata->td_taskgroup)
119406c3fb27SDimitry Andric #endif
11955ffd83dbSDimitry Andric         KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
1196fe6060f1SDimitry Andric     } else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
1197fe6060f1SDimitry Andric                              task_team->tt.tt_hidden_helper_task_encountered)) {
1198fe6060f1SDimitry Andric       // if we found proxy or hidden helper tasks there could exist a dependency
1199fe6060f1SDimitry Andric       // chain with the proxy task as origin
12005ffd83dbSDimitry Andric       __kmp_release_deps(gtid, taskdata);
12015ffd83dbSDimitry Andric     }
12025ffd83dbSDimitry Andric     // td_flags.executing must be marked as 0 after __kmp_release_deps has been
12035ffd83dbSDimitry Andric     // called. Othertwise, if a task is executed immediately from the
12045ffd83dbSDimitry Andric     // release_deps code, the flag will be reset to 1 again by this same
12055ffd83dbSDimitry Andric     // function
12065ffd83dbSDimitry Andric     KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
12075ffd83dbSDimitry Andric     taskdata->td_flags.executing = 0; // suspend the finishing task
1208bdd1243dSDimitry Andric 
1209bdd1243dSDimitry Andric     // Decrement the counter of hidden helper tasks to be executed.
1210bdd1243dSDimitry Andric     if (taskdata->td_flags.hidden_helper) {
1211bdd1243dSDimitry Andric       // Hidden helper tasks can only be executed by hidden helper threads.
1212bdd1243dSDimitry Andric       KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
1213bdd1243dSDimitry Andric       KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
1214bdd1243dSDimitry Andric     }
12155ffd83dbSDimitry Andric   }
12165ffd83dbSDimitry Andric 
12175ffd83dbSDimitry Andric   KA_TRACE(
12185ffd83dbSDimitry Andric       20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
12195ffd83dbSDimitry Andric            gtid, taskdata, children));
12205ffd83dbSDimitry Andric 
12210b57cec5SDimitry Andric   // Free this task and then ancestor tasks if they have no children.
12220b57cec5SDimitry Andric   // Restore th_current_task first as suggested by John:
12230b57cec5SDimitry Andric   // johnmc: if an asynchronous inquiry peers into the runtime system
12240b57cec5SDimitry Andric   // it doesn't see the freed task as the current task.
12250b57cec5SDimitry Andric   thread->th.th_current_task = resumed_task;
1226bdd1243dSDimitry Andric   if (completed)
12270b57cec5SDimitry Andric     __kmp_free_task_and_ancestors(gtid, taskdata, thread);
12280b57cec5SDimitry Andric 
12290b57cec5SDimitry Andric   // TODO: GEH - make sure root team implicit task is initialized properly.
12300b57cec5SDimitry Andric   // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
12310b57cec5SDimitry Andric   resumed_task->td_flags.executing = 1; // resume previous task
12320b57cec5SDimitry Andric 
123306c3fb27SDimitry Andric #if OMPX_TASKGRAPH
123406c3fb27SDimitry Andric   if (is_taskgraph && __kmp_track_children_task(taskdata) &&
123506c3fb27SDimitry Andric       taskdata->td_taskgroup) {
123606c3fb27SDimitry Andric     // TDG: we only release taskgroup barrier here because
123706c3fb27SDimitry Andric     // free_task_and_ancestors will call
123806c3fb27SDimitry Andric     // __kmp_free_task, which resets all task parameters such as
123906c3fb27SDimitry Andric     // taskdata->started, etc. If we release the barrier earlier, these
124006c3fb27SDimitry Andric     // parameters could be read before being reset. This is not an issue for
124106c3fb27SDimitry Andric     // non-TDG implementation because we never reuse a task(data) structure
124206c3fb27SDimitry Andric     KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
124306c3fb27SDimitry Andric   }
124406c3fb27SDimitry Andric #endif
124506c3fb27SDimitry Andric 
12460b57cec5SDimitry Andric   KA_TRACE(
12470b57cec5SDimitry Andric       10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
12480b57cec5SDimitry Andric            gtid, taskdata, resumed_task));
12490b57cec5SDimitry Andric 
12500b57cec5SDimitry Andric   return;
12510b57cec5SDimitry Andric }
12520b57cec5SDimitry Andric 
12530b57cec5SDimitry Andric template <bool ompt>
12540b57cec5SDimitry Andric static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref,
12550b57cec5SDimitry Andric                                                   kmp_int32 gtid,
12560b57cec5SDimitry Andric                                                   kmp_task_t *task) {
12570b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
12580b57cec5SDimitry Andric                 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1259fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= 0);
12600b57cec5SDimitry Andric   // this routine will provide task to resume
12610b57cec5SDimitry Andric   __kmp_task_finish<ompt>(gtid, task, NULL);
12620b57cec5SDimitry Andric 
12630b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
12640b57cec5SDimitry Andric                 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
12650b57cec5SDimitry Andric 
12660b57cec5SDimitry Andric #if OMPT_SUPPORT
12670b57cec5SDimitry Andric   if (ompt) {
12680b57cec5SDimitry Andric     ompt_frame_t *ompt_frame;
12690b57cec5SDimitry Andric     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
12700b57cec5SDimitry Andric     ompt_frame->enter_frame = ompt_data_none;
1271fe6060f1SDimitry Andric     ompt_frame->enter_frame_flags =
1272fe6060f1SDimitry Andric         ompt_frame_runtime | ompt_frame_framepointer;
12730b57cec5SDimitry Andric   }
12740b57cec5SDimitry Andric #endif
12750b57cec5SDimitry Andric 
12760b57cec5SDimitry Andric   return;
12770b57cec5SDimitry Andric }
12780b57cec5SDimitry Andric 
12790b57cec5SDimitry Andric #if OMPT_SUPPORT
12800b57cec5SDimitry Andric OMPT_NOINLINE
12810b57cec5SDimitry Andric void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
12820b57cec5SDimitry Andric                                        kmp_task_t *task) {
12830b57cec5SDimitry Andric   __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
12840b57cec5SDimitry Andric }
12850b57cec5SDimitry Andric #endif // OMPT_SUPPORT
12860b57cec5SDimitry Andric 
12870b57cec5SDimitry Andric // __kmpc_omp_task_complete_if0: report that a task has completed execution
12880b57cec5SDimitry Andric //
12890b57cec5SDimitry Andric // loc_ref: source location information; points to end of task block.
12900b57cec5SDimitry Andric // gtid: global thread number.
12910b57cec5SDimitry Andric // task: task thunk for the completed task.
12920b57cec5SDimitry Andric void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
12930b57cec5SDimitry Andric                                   kmp_task_t *task) {
12940b57cec5SDimitry Andric #if OMPT_SUPPORT
12950b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled)) {
12960b57cec5SDimitry Andric     __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
12970b57cec5SDimitry Andric     return;
12980b57cec5SDimitry Andric   }
12990b57cec5SDimitry Andric #endif
13000b57cec5SDimitry Andric   __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
13010b57cec5SDimitry Andric }
13020b57cec5SDimitry Andric 
13030b57cec5SDimitry Andric #ifdef TASK_UNUSED
13040b57cec5SDimitry Andric // __kmpc_omp_task_complete: report that a task has completed execution
13050b57cec5SDimitry Andric // NEVER GENERATED BY COMPILER, DEPRECATED!!!
13060b57cec5SDimitry Andric void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
13070b57cec5SDimitry Andric                               kmp_task_t *task) {
13080b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
13090b57cec5SDimitry Andric                 loc_ref, KMP_TASK_TO_TASKDATA(task)));
13100b57cec5SDimitry Andric 
13110b57cec5SDimitry Andric   __kmp_task_finish<false>(gtid, task,
13120b57cec5SDimitry Andric                            NULL); // Not sure how to find task to resume
13130b57cec5SDimitry Andric 
13140b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
13150b57cec5SDimitry Andric                 loc_ref, KMP_TASK_TO_TASKDATA(task)));
13160b57cec5SDimitry Andric   return;
13170b57cec5SDimitry Andric }
13180b57cec5SDimitry Andric #endif // TASK_UNUSED
13190b57cec5SDimitry Andric 
13200b57cec5SDimitry Andric // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
13210b57cec5SDimitry Andric // task for a given thread
13220b57cec5SDimitry Andric //
13230b57cec5SDimitry Andric // loc_ref:  reference to source location of parallel region
13240b57cec5SDimitry Andric // this_thr:  thread data structure corresponding to implicit task
13250b57cec5SDimitry Andric // team: team for this_thr
13260b57cec5SDimitry Andric // tid: thread id of given thread within team
13270b57cec5SDimitry Andric // set_curr_task: TRUE if need to push current task to thread
13280b57cec5SDimitry Andric // NOTE: Routine does not set up the implicit task ICVS.  This is assumed to
13290b57cec5SDimitry Andric // have already been done elsewhere.
13300b57cec5SDimitry Andric // TODO: Get better loc_ref.  Value passed in may be NULL
13310b57cec5SDimitry Andric void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
13320b57cec5SDimitry Andric                               kmp_team_t *team, int tid, int set_curr_task) {
13330b57cec5SDimitry Andric   kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
13340b57cec5SDimitry Andric 
13350b57cec5SDimitry Andric   KF_TRACE(
13360b57cec5SDimitry Andric       10,
13370b57cec5SDimitry Andric       ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
13380b57cec5SDimitry Andric        tid, team, task, set_curr_task ? "TRUE" : "FALSE"));
13390b57cec5SDimitry Andric 
13400b57cec5SDimitry Andric   task->td_task_id = KMP_GEN_TASK_ID();
13410b57cec5SDimitry Andric   task->td_team = team;
13420b57cec5SDimitry Andric   //    task->td_parent   = NULL;  // fix for CQ230101 (broken parent task info
13430b57cec5SDimitry Andric   //    in debugger)
13440b57cec5SDimitry Andric   task->td_ident = loc_ref;
13450b57cec5SDimitry Andric   task->td_taskwait_ident = NULL;
13460b57cec5SDimitry Andric   task->td_taskwait_counter = 0;
13470b57cec5SDimitry Andric   task->td_taskwait_thread = 0;
13480b57cec5SDimitry Andric 
13490b57cec5SDimitry Andric   task->td_flags.tiedness = TASK_TIED;
13500b57cec5SDimitry Andric   task->td_flags.tasktype = TASK_IMPLICIT;
13510b57cec5SDimitry Andric   task->td_flags.proxy = TASK_FULL;
13520b57cec5SDimitry Andric 
13530b57cec5SDimitry Andric   // All implicit tasks are executed immediately, not deferred
13540b57cec5SDimitry Andric   task->td_flags.task_serial = 1;
13550b57cec5SDimitry Andric   task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
13560b57cec5SDimitry Andric   task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
13570b57cec5SDimitry Andric 
13580b57cec5SDimitry Andric   task->td_flags.started = 1;
13590b57cec5SDimitry Andric   task->td_flags.executing = 1;
13600b57cec5SDimitry Andric   task->td_flags.complete = 0;
13610b57cec5SDimitry Andric   task->td_flags.freed = 0;
136206c3fb27SDimitry Andric #if OMPX_TASKGRAPH
136306c3fb27SDimitry Andric   task->td_flags.onced = 0;
136406c3fb27SDimitry Andric #endif
13650b57cec5SDimitry Andric 
13660b57cec5SDimitry Andric   task->td_depnode = NULL;
13670b57cec5SDimitry Andric   task->td_last_tied = task;
13680b57cec5SDimitry Andric   task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
13690b57cec5SDimitry Andric 
13700b57cec5SDimitry Andric   if (set_curr_task) { // only do this init first time thread is created
13710b57cec5SDimitry Andric     KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
13720b57cec5SDimitry Andric     // Not used: don't need to deallocate implicit task
13730b57cec5SDimitry Andric     KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
13740b57cec5SDimitry Andric     task->td_taskgroup = NULL; // An implicit task does not have taskgroup
13750b57cec5SDimitry Andric     task->td_dephash = NULL;
13760b57cec5SDimitry Andric     __kmp_push_current_task_to_thread(this_thr, team, tid);
13770b57cec5SDimitry Andric   } else {
13780b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
13790b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
13800b57cec5SDimitry Andric   }
13810b57cec5SDimitry Andric 
13820b57cec5SDimitry Andric #if OMPT_SUPPORT
13830b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled))
13840b57cec5SDimitry Andric     __ompt_task_init(task, tid);
13850b57cec5SDimitry Andric #endif
13860b57cec5SDimitry Andric 
13870b57cec5SDimitry Andric   KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
13880b57cec5SDimitry Andric                 team, task));
13890b57cec5SDimitry Andric }
13900b57cec5SDimitry Andric 
13910b57cec5SDimitry Andric // __kmp_finish_implicit_task: Release resources associated to implicit tasks
13920b57cec5SDimitry Andric // at the end of parallel regions. Some resources are kept for reuse in the next
13930b57cec5SDimitry Andric // parallel region.
13940b57cec5SDimitry Andric //
13950b57cec5SDimitry Andric // thread:  thread data structure corresponding to implicit task
13960b57cec5SDimitry Andric void __kmp_finish_implicit_task(kmp_info_t *thread) {
13970b57cec5SDimitry Andric   kmp_taskdata_t *task = thread->th.th_current_task;
13980b57cec5SDimitry Andric   if (task->td_dephash) {
13990b57cec5SDimitry Andric     int children;
14000b57cec5SDimitry Andric     task->td_flags.complete = 1;
140106c3fb27SDimitry Andric #if OMPX_TASKGRAPH
140206c3fb27SDimitry Andric     task->td_flags.onced = 1;
140306c3fb27SDimitry Andric #endif
14040b57cec5SDimitry Andric     children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
14050b57cec5SDimitry Andric     kmp_tasking_flags_t flags_old = task->td_flags;
14060b57cec5SDimitry Andric     if (children == 0 && flags_old.complete == 1) {
14070b57cec5SDimitry Andric       kmp_tasking_flags_t flags_new = flags_old;
14080b57cec5SDimitry Andric       flags_new.complete = 0;
14090b57cec5SDimitry Andric       if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
14100b57cec5SDimitry Andric                                       *RCAST(kmp_int32 *, &flags_old),
14110b57cec5SDimitry Andric                                       *RCAST(kmp_int32 *, &flags_new))) {
14120b57cec5SDimitry Andric         KA_TRACE(100, ("__kmp_finish_implicit_task: T#%d cleans "
14130b57cec5SDimitry Andric                        "dephash of implicit task %p\n",
14140b57cec5SDimitry Andric                        thread->th.th_info.ds.ds_gtid, task));
14150b57cec5SDimitry Andric         __kmp_dephash_free_entries(thread, task->td_dephash);
14160b57cec5SDimitry Andric       }
14170b57cec5SDimitry Andric     }
14180b57cec5SDimitry Andric   }
14190b57cec5SDimitry Andric }
14200b57cec5SDimitry Andric 
14210b57cec5SDimitry Andric // __kmp_free_implicit_task: Release resources associated to implicit tasks
14220b57cec5SDimitry Andric // when these are destroyed regions
14230b57cec5SDimitry Andric //
14240b57cec5SDimitry Andric // thread:  thread data structure corresponding to implicit task
14250b57cec5SDimitry Andric void __kmp_free_implicit_task(kmp_info_t *thread) {
14260b57cec5SDimitry Andric   kmp_taskdata_t *task = thread->th.th_current_task;
14270b57cec5SDimitry Andric   if (task && task->td_dephash) {
14280b57cec5SDimitry Andric     __kmp_dephash_free(thread, task->td_dephash);
14290b57cec5SDimitry Andric     task->td_dephash = NULL;
14300b57cec5SDimitry Andric   }
14310b57cec5SDimitry Andric }
14320b57cec5SDimitry Andric 
14330b57cec5SDimitry Andric // Round up a size to a power of two specified by val: Used to insert padding
14340b57cec5SDimitry Andric // between structures co-allocated using a single malloc() call
14350b57cec5SDimitry Andric static size_t __kmp_round_up_to_val(size_t size, size_t val) {
14360b57cec5SDimitry Andric   if (size & (val - 1)) {
14370b57cec5SDimitry Andric     size &= ~(val - 1);
14380b57cec5SDimitry Andric     if (size <= KMP_SIZE_T_MAX - val) {
14390b57cec5SDimitry Andric       size += val; // Round up if there is no overflow.
14400b57cec5SDimitry Andric     }
14410b57cec5SDimitry Andric   }
14420b57cec5SDimitry Andric   return size;
14430b57cec5SDimitry Andric } // __kmp_round_up_to_va
14440b57cec5SDimitry Andric 
14450b57cec5SDimitry Andric // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
14460b57cec5SDimitry Andric //
14470b57cec5SDimitry Andric // loc_ref: source location information
14480b57cec5SDimitry Andric // gtid: global thread number.
14490b57cec5SDimitry Andric // flags: include tiedness & task type (explicit vs. implicit) of the ''new''
14500b57cec5SDimitry Andric // task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine.
14510b57cec5SDimitry Andric // sizeof_kmp_task_t:  Size in bytes of kmp_task_t data structure including
14520b57cec5SDimitry Andric // private vars accessed in task.
14530b57cec5SDimitry Andric // sizeof_shareds:  Size in bytes of array of pointers to shared vars accessed
14540b57cec5SDimitry Andric // in task.
14550b57cec5SDimitry Andric // task_entry: Pointer to task code entry point generated by compiler.
14560b57cec5SDimitry Andric // returns: a pointer to the allocated kmp_task_t structure (task).
14570b57cec5SDimitry Andric kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
14580b57cec5SDimitry Andric                              kmp_tasking_flags_t *flags,
14590b57cec5SDimitry Andric                              size_t sizeof_kmp_task_t, size_t sizeof_shareds,
14600b57cec5SDimitry Andric                              kmp_routine_entry_t task_entry) {
14610b57cec5SDimitry Andric   kmp_task_t *task;
14620b57cec5SDimitry Andric   kmp_taskdata_t *taskdata;
14630b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
14640b57cec5SDimitry Andric   kmp_team_t *team = thread->th.th_team;
14650b57cec5SDimitry Andric   kmp_taskdata_t *parent_task = thread->th.th_current_task;
14660b57cec5SDimitry Andric   size_t shareds_offset;
14670b57cec5SDimitry Andric 
1468e8d8bef9SDimitry Andric   if (UNLIKELY(!TCR_4(__kmp_init_middle)))
14690b57cec5SDimitry Andric     __kmp_middle_initialize();
14700b57cec5SDimitry Andric 
1471e8d8bef9SDimitry Andric   if (flags->hidden_helper) {
1472e8d8bef9SDimitry Andric     if (__kmp_enable_hidden_helper) {
1473e8d8bef9SDimitry Andric       if (!TCR_4(__kmp_init_hidden_helper))
1474e8d8bef9SDimitry Andric         __kmp_hidden_helper_initialize();
1475e8d8bef9SDimitry Andric     } else {
1476e8d8bef9SDimitry Andric       // If the hidden helper task is not enabled, reset the flag to FALSE.
1477e8d8bef9SDimitry Andric       flags->hidden_helper = FALSE;
1478e8d8bef9SDimitry Andric     }
1479e8d8bef9SDimitry Andric   }
1480e8d8bef9SDimitry Andric 
14810b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
14820b57cec5SDimitry Andric                 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
14830b57cec5SDimitry Andric                 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
14840b57cec5SDimitry Andric                 sizeof_shareds, task_entry));
14850b57cec5SDimitry Andric 
1486fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(parent_task);
14870b57cec5SDimitry Andric   if (parent_task->td_flags.final) {
14880b57cec5SDimitry Andric     if (flags->merged_if0) {
14890b57cec5SDimitry Andric     }
14900b57cec5SDimitry Andric     flags->final = 1;
14910b57cec5SDimitry Andric   }
1492e8d8bef9SDimitry Andric 
14930b57cec5SDimitry Andric   if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
14940b57cec5SDimitry Andric     // Untied task encountered causes the TSC algorithm to check entire deque of
14950b57cec5SDimitry Andric     // the victim thread. If no untied task encountered, then checking the head
14960b57cec5SDimitry Andric     // of the deque should be enough.
1497349cc55cSDimitry Andric     KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
14980b57cec5SDimitry Andric   }
14990b57cec5SDimitry Andric 
15000b57cec5SDimitry Andric   // Detachable tasks are not proxy tasks yet but could be in the future. Doing
15010b57cec5SDimitry Andric   // the tasking setup
15020b57cec5SDimitry Andric   // when that happens is too late.
1503fe6060f1SDimitry Andric   if (UNLIKELY(flags->proxy == TASK_PROXY ||
1504fe6060f1SDimitry Andric                flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
15050b57cec5SDimitry Andric     if (flags->proxy == TASK_PROXY) {
15060b57cec5SDimitry Andric       flags->tiedness = TASK_UNTIED;
15070b57cec5SDimitry Andric       flags->merged_if0 = 1;
15080b57cec5SDimitry Andric     }
15090b57cec5SDimitry Andric     /* are we running in a sequential parallel or tskm_immediate_exec... we need
15100b57cec5SDimitry Andric        tasking support enabled */
1511349cc55cSDimitry Andric     if ((thread->th.th_task_team) == NULL) {
15120b57cec5SDimitry Andric       /* This should only happen if the team is serialized
15130b57cec5SDimitry Andric           setup a task team and propagate it to the thread */
15140b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(team->t.t_serialized);
15150b57cec5SDimitry Andric       KA_TRACE(30,
15160b57cec5SDimitry Andric                ("T#%d creating task team in __kmp_task_alloc for proxy task\n",
15170b57cec5SDimitry Andric                 gtid));
1518*0fca6ea1SDimitry Andric       __kmp_task_team_setup(thread, team);
1519349cc55cSDimitry Andric       thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
15200b57cec5SDimitry Andric     }
1521349cc55cSDimitry Andric     kmp_task_team_t *task_team = thread->th.th_task_team;
15220b57cec5SDimitry Andric 
15230b57cec5SDimitry Andric     /* tasking must be enabled now as the task might not be pushed */
15240b57cec5SDimitry Andric     if (!KMP_TASKING_ENABLED(task_team)) {
15250b57cec5SDimitry Andric       KA_TRACE(
15260b57cec5SDimitry Andric           30,
15270b57cec5SDimitry Andric           ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1528349cc55cSDimitry Andric       __kmp_enable_tasking(task_team, thread);
1529349cc55cSDimitry Andric       kmp_int32 tid = thread->th.th_info.ds.ds_tid;
15300b57cec5SDimitry Andric       kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
15310b57cec5SDimitry Andric       // No lock needed since only owner can allocate
15320b57cec5SDimitry Andric       if (thread_data->td.td_deque == NULL) {
1533349cc55cSDimitry Andric         __kmp_alloc_task_deque(thread, thread_data);
15340b57cec5SDimitry Andric       }
15350b57cec5SDimitry Andric     }
15360b57cec5SDimitry Andric 
1537fe6060f1SDimitry Andric     if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
1538e8d8bef9SDimitry Andric         task_team->tt.tt_found_proxy_tasks == FALSE)
15390b57cec5SDimitry Andric       TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1540e8d8bef9SDimitry Andric     if (flags->hidden_helper &&
1541e8d8bef9SDimitry Andric         task_team->tt.tt_hidden_helper_task_encountered == FALSE)
1542e8d8bef9SDimitry Andric       TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
15430b57cec5SDimitry Andric   }
15440b57cec5SDimitry Andric 
15450b57cec5SDimitry Andric   // Calculate shared structure offset including padding after kmp_task_t struct
15460b57cec5SDimitry Andric   // to align pointers in shared struct
15470b57cec5SDimitry Andric   shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
15480b57cec5SDimitry Andric   shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *));
15490b57cec5SDimitry Andric 
15500b57cec5SDimitry Andric   // Allocate a kmp_taskdata_t block and a kmp_task_t block.
15510b57cec5SDimitry Andric   KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
15520b57cec5SDimitry Andric                 shareds_offset));
15530b57cec5SDimitry Andric   KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
15540b57cec5SDimitry Andric                 sizeof_shareds));
15550b57cec5SDimitry Andric 
15560b57cec5SDimitry Andric   // Avoid double allocation here by combining shareds with taskdata
15570b57cec5SDimitry Andric #if USE_FAST_MEMORY
1558349cc55cSDimitry Andric   taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1559349cc55cSDimitry Andric                                                                sizeof_shareds);
15600b57cec5SDimitry Andric #else /* ! USE_FAST_MEMORY */
1561349cc55cSDimitry Andric   taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1562349cc55cSDimitry Andric                                                                sizeof_shareds);
15630b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */
15640b57cec5SDimitry Andric 
15650b57cec5SDimitry Andric   task = KMP_TASKDATA_TO_TASK(taskdata);
15660b57cec5SDimitry Andric 
15670b57cec5SDimitry Andric // Make sure task & taskdata are aligned appropriately
15685f757f3fSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_S390X || !KMP_HAVE_QUAD
15690b57cec5SDimitry Andric   KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0);
15700b57cec5SDimitry Andric   KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0);
15710b57cec5SDimitry Andric #else
15720b57cec5SDimitry Andric   KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0);
15730b57cec5SDimitry Andric   KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0);
15740b57cec5SDimitry Andric #endif
15750b57cec5SDimitry Andric   if (sizeof_shareds > 0) {
15760b57cec5SDimitry Andric     // Avoid double allocation here by combining shareds with taskdata
15770b57cec5SDimitry Andric     task->shareds = &((char *)taskdata)[shareds_offset];
15780b57cec5SDimitry Andric     // Make sure shareds struct is aligned to pointer size
15790b57cec5SDimitry Andric     KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
15800b57cec5SDimitry Andric                      0);
15810b57cec5SDimitry Andric   } else {
15820b57cec5SDimitry Andric     task->shareds = NULL;
15830b57cec5SDimitry Andric   }
15840b57cec5SDimitry Andric   task->routine = task_entry;
15850b57cec5SDimitry Andric   task->part_id = 0; // AC: Always start with 0 part id
15860b57cec5SDimitry Andric 
15870b57cec5SDimitry Andric   taskdata->td_task_id = KMP_GEN_TASK_ID();
1588e8d8bef9SDimitry Andric   taskdata->td_team = thread->th.th_team;
1589349cc55cSDimitry Andric   taskdata->td_alloc_thread = thread;
15900b57cec5SDimitry Andric   taskdata->td_parent = parent_task;
15910b57cec5SDimitry Andric   taskdata->td_level = parent_task->td_level + 1; // increment nesting level
15920b57cec5SDimitry Andric   KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
15930b57cec5SDimitry Andric   taskdata->td_ident = loc_ref;
15940b57cec5SDimitry Andric   taskdata->td_taskwait_ident = NULL;
15950b57cec5SDimitry Andric   taskdata->td_taskwait_counter = 0;
15960b57cec5SDimitry Andric   taskdata->td_taskwait_thread = 0;
15970b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
15980b57cec5SDimitry Andric   // avoid copying icvs for proxy tasks
15990b57cec5SDimitry Andric   if (flags->proxy == TASK_FULL)
16000b57cec5SDimitry Andric     copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
16010b57cec5SDimitry Andric 
1602fe6060f1SDimitry Andric   taskdata->td_flags = *flags;
16030b57cec5SDimitry Andric   taskdata->td_task_team = thread->th.th_task_team;
16040b57cec5SDimitry Andric   taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
16050b57cec5SDimitry Andric   taskdata->td_flags.tasktype = TASK_EXPLICIT;
1606349cc55cSDimitry Andric   // If it is hidden helper task, we need to set the team and task team
1607349cc55cSDimitry Andric   // correspondingly.
1608349cc55cSDimitry Andric   if (flags->hidden_helper) {
1609349cc55cSDimitry Andric     kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
1610349cc55cSDimitry Andric     taskdata->td_team = shadow_thread->th.th_team;
1611349cc55cSDimitry Andric     taskdata->td_task_team = shadow_thread->th.th_task_team;
1612349cc55cSDimitry Andric   }
16130b57cec5SDimitry Andric 
16140b57cec5SDimitry Andric   // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
16150b57cec5SDimitry Andric   taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
16160b57cec5SDimitry Andric 
16170b57cec5SDimitry Andric   // GEH - TODO: fix this to copy parent task's value of team_serial flag
16180b57cec5SDimitry Andric   taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
16190b57cec5SDimitry Andric 
16200b57cec5SDimitry Andric   // GEH - Note we serialize the task if the team is serialized to make sure
16210b57cec5SDimitry Andric   // implicit parallel region tasks are not left until program termination to
16220b57cec5SDimitry Andric   // execute. Also, it helps locality to execute immediately.
16230b57cec5SDimitry Andric 
16240b57cec5SDimitry Andric   taskdata->td_flags.task_serial =
16250b57cec5SDimitry Andric       (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
16265ffd83dbSDimitry Andric        taskdata->td_flags.tasking_ser || flags->merged_if0);
16270b57cec5SDimitry Andric 
16280b57cec5SDimitry Andric   taskdata->td_flags.started = 0;
16290b57cec5SDimitry Andric   taskdata->td_flags.executing = 0;
16300b57cec5SDimitry Andric   taskdata->td_flags.complete = 0;
16310b57cec5SDimitry Andric   taskdata->td_flags.freed = 0;
163206c3fb27SDimitry Andric #if OMPX_TASKGRAPH
163306c3fb27SDimitry Andric   taskdata->td_flags.onced = 0;
163406c3fb27SDimitry Andric #endif
16350b57cec5SDimitry Andric   KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
16360b57cec5SDimitry Andric   // start at one because counts current task and children
16370b57cec5SDimitry Andric   KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
16380b57cec5SDimitry Andric   taskdata->td_taskgroup =
16390b57cec5SDimitry Andric       parent_task->td_taskgroup; // task inherits taskgroup from the parent task
16400b57cec5SDimitry Andric   taskdata->td_dephash = NULL;
16410b57cec5SDimitry Andric   taskdata->td_depnode = NULL;
1642bdd1243dSDimitry Andric   taskdata->td_target_data.async_handle = NULL;
16430b57cec5SDimitry Andric   if (flags->tiedness == TASK_UNTIED)
16440b57cec5SDimitry Andric     taskdata->td_last_tied = NULL; // will be set when the task is scheduled
16450b57cec5SDimitry Andric   else
16460b57cec5SDimitry Andric     taskdata->td_last_tied = taskdata;
16470b57cec5SDimitry Andric   taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
16480b57cec5SDimitry Andric #if OMPT_SUPPORT
16490b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled))
16500b57cec5SDimitry Andric     __ompt_task_init(taskdata, gtid);
16510b57cec5SDimitry Andric #endif
1652349cc55cSDimitry Andric   // TODO: What would be the balance between the conditions in the function and
1653349cc55cSDimitry Andric   // an atomic operation?
1654349cc55cSDimitry Andric   if (__kmp_track_children_task(taskdata)) {
16550b57cec5SDimitry Andric     KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
16560b57cec5SDimitry Andric     if (parent_task->td_taskgroup)
16570b57cec5SDimitry Andric       KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
16580b57cec5SDimitry Andric     // Only need to keep track of allocated child tasks for explicit tasks since
16590b57cec5SDimitry Andric     // implicit not deallocated
16600b57cec5SDimitry Andric     if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
16610b57cec5SDimitry Andric       KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
16620b57cec5SDimitry Andric     }
1663e8d8bef9SDimitry Andric     if (flags->hidden_helper) {
1664e8d8bef9SDimitry Andric       taskdata->td_flags.task_serial = FALSE;
1665e8d8bef9SDimitry Andric       // Increment the number of hidden helper tasks to be executed
1666e8d8bef9SDimitry Andric       KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
1667e8d8bef9SDimitry Andric     }
1668fe6060f1SDimitry Andric   }
1669e8d8bef9SDimitry Andric 
167006c3fb27SDimitry Andric #if OMPX_TASKGRAPH
167106c3fb27SDimitry Andric   kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
167206c3fb27SDimitry Andric   if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
167306c3fb27SDimitry Andric       (task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) {
167406c3fb27SDimitry Andric     taskdata->is_taskgraph = 1;
167506c3fb27SDimitry Andric     taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
167606c3fb27SDimitry Andric     taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
167706c3fb27SDimitry Andric   }
167806c3fb27SDimitry Andric #endif
16790b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
16800b57cec5SDimitry Andric                 gtid, taskdata, taskdata->td_parent));
16810b57cec5SDimitry Andric 
16820b57cec5SDimitry Andric   return task;
16830b57cec5SDimitry Andric }
16840b57cec5SDimitry Andric 
16850b57cec5SDimitry Andric kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
16860b57cec5SDimitry Andric                                   kmp_int32 flags, size_t sizeof_kmp_task_t,
16870b57cec5SDimitry Andric                                   size_t sizeof_shareds,
16880b57cec5SDimitry Andric                                   kmp_routine_entry_t task_entry) {
16890b57cec5SDimitry Andric   kmp_task_t *retval;
16900b57cec5SDimitry Andric   kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1691e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
16920b57cec5SDimitry Andric   input_flags->native = FALSE;
16930b57cec5SDimitry Andric   // __kmp_task_alloc() sets up all other runtime flags
16940b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
16950b57cec5SDimitry Andric                 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
16960b57cec5SDimitry Andric                 gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied",
16970b57cec5SDimitry Andric                 input_flags->proxy ? "proxy" : "",
16980b57cec5SDimitry Andric                 input_flags->detachable ? "detachable" : "", sizeof_kmp_task_t,
16990b57cec5SDimitry Andric                 sizeof_shareds, task_entry));
17000b57cec5SDimitry Andric 
17010b57cec5SDimitry Andric   retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
17020b57cec5SDimitry Andric                             sizeof_shareds, task_entry);
17030b57cec5SDimitry Andric 
17040b57cec5SDimitry Andric   KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
17050b57cec5SDimitry Andric 
17060b57cec5SDimitry Andric   return retval;
17070b57cec5SDimitry Andric }
17080b57cec5SDimitry Andric 
17090b57cec5SDimitry Andric kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
17100b57cec5SDimitry Andric                                          kmp_int32 flags,
17110b57cec5SDimitry Andric                                          size_t sizeof_kmp_task_t,
17120b57cec5SDimitry Andric                                          size_t sizeof_shareds,
17130b57cec5SDimitry Andric                                          kmp_routine_entry_t task_entry,
17140b57cec5SDimitry Andric                                          kmp_int64 device_id) {
1715e8d8bef9SDimitry Andric   auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
1716349cc55cSDimitry Andric   // target task is untied defined in the specification
17176e75b2fbSDimitry Andric   input_flags.tiedness = TASK_UNTIED;
1718*0fca6ea1SDimitry Andric   input_flags.target = 1;
1719349cc55cSDimitry Andric 
1720349cc55cSDimitry Andric   if (__kmp_enable_hidden_helper)
1721349cc55cSDimitry Andric     input_flags.hidden_helper = TRUE;
1722e8d8bef9SDimitry Andric 
17230b57cec5SDimitry Andric   return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
17240b57cec5SDimitry Andric                                sizeof_shareds, task_entry);
17250b57cec5SDimitry Andric }
17260b57cec5SDimitry Andric 
17270b57cec5SDimitry Andric /*!
17280b57cec5SDimitry Andric @ingroup TASKING
17290b57cec5SDimitry Andric @param loc_ref location of the original task directive
17300b57cec5SDimitry Andric @param gtid Global Thread ID of encountering thread
17310b57cec5SDimitry Andric @param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new
17320b57cec5SDimitry Andric task''
17330b57cec5SDimitry Andric @param naffins Number of affinity items
17340b57cec5SDimitry Andric @param affin_list List of affinity items
17350b57cec5SDimitry Andric @return Returns non-zero if registering affinity information was not successful.
17360b57cec5SDimitry Andric  Returns 0 if registration was successful
17370b57cec5SDimitry Andric This entry registers the affinity information attached to a task with the task
17380b57cec5SDimitry Andric thunk structure kmp_taskdata_t.
17390b57cec5SDimitry Andric */
17400b57cec5SDimitry Andric kmp_int32
17410b57cec5SDimitry Andric __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
17420b57cec5SDimitry Andric                                   kmp_task_t *new_task, kmp_int32 naffins,
17430b57cec5SDimitry Andric                                   kmp_task_affinity_info_t *affin_list) {
17440b57cec5SDimitry Andric   return 0;
17450b57cec5SDimitry Andric }
17460b57cec5SDimitry Andric 
17470b57cec5SDimitry Andric //  __kmp_invoke_task: invoke the specified task
17480b57cec5SDimitry Andric //
17490b57cec5SDimitry Andric // gtid: global thread ID of caller
17500b57cec5SDimitry Andric // task: the task to invoke
17515ffd83dbSDimitry Andric // current_task: the task to resume after task invocation
17525f757f3fSDimitry Andric #ifdef __s390x__
17535f757f3fSDimitry Andric __attribute__((target("backchain")))
17545f757f3fSDimitry Andric #endif
17555f757f3fSDimitry Andric static void
17565f757f3fSDimitry Andric __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
17570b57cec5SDimitry Andric                   kmp_taskdata_t *current_task) {
17580b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
17590b57cec5SDimitry Andric   kmp_info_t *thread;
17600b57cec5SDimitry Andric   int discard = 0 /* false */;
17610b57cec5SDimitry Andric   KA_TRACE(
17620b57cec5SDimitry Andric       30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
17630b57cec5SDimitry Andric            gtid, taskdata, current_task));
17640b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task);
1765e8d8bef9SDimitry Andric   if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
1766e8d8bef9SDimitry Andric                taskdata->td_flags.complete == 1)) {
17670b57cec5SDimitry Andric     // This is a proxy task that was already completed but it needs to run
17680b57cec5SDimitry Andric     // its bottom-half finish
17690b57cec5SDimitry Andric     KA_TRACE(
17700b57cec5SDimitry Andric         30,
17710b57cec5SDimitry Andric         ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
17720b57cec5SDimitry Andric          gtid, taskdata));
17730b57cec5SDimitry Andric 
17740b57cec5SDimitry Andric     __kmp_bottom_half_finish_proxy(gtid, task);
17750b57cec5SDimitry Andric 
17760b57cec5SDimitry Andric     KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
17770b57cec5SDimitry Andric                   "proxy task %p, resuming task %p\n",
17780b57cec5SDimitry Andric                   gtid, taskdata, current_task));
17790b57cec5SDimitry Andric 
17800b57cec5SDimitry Andric     return;
17810b57cec5SDimitry Andric   }
17820b57cec5SDimitry Andric 
17830b57cec5SDimitry Andric #if OMPT_SUPPORT
17840b57cec5SDimitry Andric   // For untied tasks, the first task executed only calls __kmpc_omp_task and
17850b57cec5SDimitry Andric   // does not execute code.
17860b57cec5SDimitry Andric   ompt_thread_info_t oldInfo;
17870b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled)) {
17880b57cec5SDimitry Andric     // Store the threads states and restore them after the task
17890b57cec5SDimitry Andric     thread = __kmp_threads[gtid];
17900b57cec5SDimitry Andric     oldInfo = thread->th.ompt_thread_info;
17910b57cec5SDimitry Andric     thread->th.ompt_thread_info.wait_id = 0;
17920b57cec5SDimitry Andric     thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
17930b57cec5SDimitry Andric                                             ? ompt_state_work_serial
17940b57cec5SDimitry Andric                                             : ompt_state_work_parallel;
17950b57cec5SDimitry Andric     taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
17960b57cec5SDimitry Andric   }
17970b57cec5SDimitry Andric #endif
17980b57cec5SDimitry Andric 
17990b57cec5SDimitry Andric   // Proxy tasks are not handled by the runtime
18000b57cec5SDimitry Andric   if (taskdata->td_flags.proxy != TASK_PROXY) {
18010b57cec5SDimitry Andric     __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded
18020b57cec5SDimitry Andric   }
18030b57cec5SDimitry Andric 
18040b57cec5SDimitry Andric   // TODO: cancel tasks if the parallel region has also been cancelled
18050b57cec5SDimitry Andric   // TODO: check if this sequence can be hoisted above __kmp_task_start
18060b57cec5SDimitry Andric   // if cancellation has been enabled for this run ...
1807e8d8bef9SDimitry Andric   if (UNLIKELY(__kmp_omp_cancellation)) {
18080b57cec5SDimitry Andric     thread = __kmp_threads[gtid];
18090b57cec5SDimitry Andric     kmp_team_t *this_team = thread->th.th_team;
18100b57cec5SDimitry Andric     kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
18110b57cec5SDimitry Andric     if ((taskgroup && taskgroup->cancel_request) ||
18120b57cec5SDimitry Andric         (this_team->t.t_cancel_request == cancel_parallel)) {
18130b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
18140b57cec5SDimitry Andric       ompt_data_t *task_data;
18150b57cec5SDimitry Andric       if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
18160b57cec5SDimitry Andric         __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
18170b57cec5SDimitry Andric         ompt_callbacks.ompt_callback(ompt_callback_cancel)(
18180b57cec5SDimitry Andric             task_data,
18190b57cec5SDimitry Andric             ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
18200b57cec5SDimitry Andric                                                       : ompt_cancel_parallel) |
18210b57cec5SDimitry Andric                 ompt_cancel_discarded_task,
18220b57cec5SDimitry Andric             NULL);
18230b57cec5SDimitry Andric       }
18240b57cec5SDimitry Andric #endif
18250b57cec5SDimitry Andric       KMP_COUNT_BLOCK(TASK_cancelled);
18260b57cec5SDimitry Andric       // this task belongs to a task group and we need to cancel it
18270b57cec5SDimitry Andric       discard = 1 /* true */;
18280b57cec5SDimitry Andric     }
18290b57cec5SDimitry Andric   }
18300b57cec5SDimitry Andric 
18310b57cec5SDimitry Andric   // Invoke the task routine and pass in relevant data.
18320b57cec5SDimitry Andric   // Thunks generated by gcc take a different argument list.
18330b57cec5SDimitry Andric   if (!discard) {
18340b57cec5SDimitry Andric     if (taskdata->td_flags.tiedness == TASK_UNTIED) {
18350b57cec5SDimitry Andric       taskdata->td_last_tied = current_task->td_last_tied;
18360b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(taskdata->td_last_tied);
18370b57cec5SDimitry Andric     }
18380b57cec5SDimitry Andric #if KMP_STATS_ENABLED
18390b57cec5SDimitry Andric     KMP_COUNT_BLOCK(TASK_executed);
18400b57cec5SDimitry Andric     switch (KMP_GET_THREAD_STATE()) {
18410b57cec5SDimitry Andric     case FORK_JOIN_BARRIER:
18420b57cec5SDimitry Andric       KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
18430b57cec5SDimitry Andric       break;
18440b57cec5SDimitry Andric     case PLAIN_BARRIER:
18450b57cec5SDimitry Andric       KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
18460b57cec5SDimitry Andric       break;
18470b57cec5SDimitry Andric     case TASKYIELD:
18480b57cec5SDimitry Andric       KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
18490b57cec5SDimitry Andric       break;
18500b57cec5SDimitry Andric     case TASKWAIT:
18510b57cec5SDimitry Andric       KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
18520b57cec5SDimitry Andric       break;
18530b57cec5SDimitry Andric     case TASKGROUP:
18540b57cec5SDimitry Andric       KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
18550b57cec5SDimitry Andric       break;
18560b57cec5SDimitry Andric     default:
18570b57cec5SDimitry Andric       KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
18580b57cec5SDimitry Andric       break;
18590b57cec5SDimitry Andric     }
18600b57cec5SDimitry Andric #endif // KMP_STATS_ENABLED
18610b57cec5SDimitry Andric 
18620b57cec5SDimitry Andric // OMPT task begin
18630b57cec5SDimitry Andric #if OMPT_SUPPORT
18640b57cec5SDimitry Andric     if (UNLIKELY(ompt_enabled.enabled))
18650b57cec5SDimitry Andric       __ompt_task_start(task, current_task, gtid);
18660b57cec5SDimitry Andric #endif
186781ad6265SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
186881ad6265SDimitry Andric     if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&
186981ad6265SDimitry Andric                  taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
187081ad6265SDimitry Andric       ompt_data_t instance = ompt_data_none;
187181ad6265SDimitry Andric       instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
187281ad6265SDimitry Andric       ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
187381ad6265SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
187481ad6265SDimitry Andric           &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
187581ad6265SDimitry Andric           ompt_dispatch_taskloop_chunk, instance);
187681ad6265SDimitry Andric       taskdata->ompt_task_info.dispatch_chunk = {0, 0};
187781ad6265SDimitry Andric     }
187881ad6265SDimitry Andric #endif // OMPT_SUPPORT && OMPT_OPTIONAL
18790b57cec5SDimitry Andric 
1880fe6060f1SDimitry Andric #if OMPD_SUPPORT
1881fe6060f1SDimitry Andric     if (ompd_state & OMPD_ENABLE_BP)
1882fe6060f1SDimitry Andric       ompd_bp_task_begin();
1883fe6060f1SDimitry Andric #endif
1884fe6060f1SDimitry Andric 
18850b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
18860b57cec5SDimitry Andric     kmp_uint64 cur_time;
18870b57cec5SDimitry Andric     kmp_int32 kmp_itt_count_task =
18880b57cec5SDimitry Andric         __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
18890b57cec5SDimitry Andric         current_task->td_flags.tasktype == TASK_IMPLICIT;
18900b57cec5SDimitry Andric     if (kmp_itt_count_task) {
18910b57cec5SDimitry Andric       thread = __kmp_threads[gtid];
18920b57cec5SDimitry Andric       // Time outer level explicit task on barrier for adjusting imbalance time
18930b57cec5SDimitry Andric       if (thread->th.th_bar_arrive_time)
18940b57cec5SDimitry Andric         cur_time = __itt_get_timestamp();
18950b57cec5SDimitry Andric       else
18960b57cec5SDimitry Andric         kmp_itt_count_task = 0; // thread is not on a barrier - skip timing
18970b57cec5SDimitry Andric     }
1898e8d8bef9SDimitry Andric     KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task)
18990b57cec5SDimitry Andric #endif
19000b57cec5SDimitry Andric 
1901bdd1243dSDimitry Andric #if ENABLE_LIBOMPTARGET
1902bdd1243dSDimitry Andric     if (taskdata->td_target_data.async_handle != NULL) {
1903bdd1243dSDimitry Andric       // If we have a valid target async handle, that means that we have already
1904bdd1243dSDimitry Andric       // executed the task routine once. We must query for the handle completion
1905bdd1243dSDimitry Andric       // instead of re-executing the routine.
190606c3fb27SDimitry Andric       KMP_ASSERT(tgt_target_nowait_query);
190706c3fb27SDimitry Andric       tgt_target_nowait_query(&taskdata->td_target_data.async_handle);
1908bdd1243dSDimitry Andric     } else
1909bdd1243dSDimitry Andric #endif
1910349cc55cSDimitry Andric     if (task->routine != NULL) {
19110b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT
19120b57cec5SDimitry Andric       if (taskdata->td_flags.native) {
19130b57cec5SDimitry Andric         ((void (*)(void *))(*(task->routine)))(task->shareds);
19140b57cec5SDimitry Andric       } else
19150b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */
19160b57cec5SDimitry Andric       {
19170b57cec5SDimitry Andric         (*(task->routine))(gtid, task);
19180b57cec5SDimitry Andric       }
1919349cc55cSDimitry Andric     }
19200b57cec5SDimitry Andric     KMP_POP_PARTITIONED_TIMER();
19210b57cec5SDimitry Andric 
19220b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
19230b57cec5SDimitry Andric     if (kmp_itt_count_task) {
19240b57cec5SDimitry Andric       // Barrier imbalance - adjust arrive time with the task duration
19250b57cec5SDimitry Andric       thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
19260b57cec5SDimitry Andric     }
1927e8d8bef9SDimitry Andric     KMP_FSYNC_CANCEL(taskdata); // destroy self (just executed)
1928e8d8bef9SDimitry Andric     KMP_FSYNC_RELEASING(taskdata->td_parent); // releasing parent
19290b57cec5SDimitry Andric #endif
19300b57cec5SDimitry Andric   }
19310b57cec5SDimitry Andric 
1932fe6060f1SDimitry Andric #if OMPD_SUPPORT
1933fe6060f1SDimitry Andric   if (ompd_state & OMPD_ENABLE_BP)
1934fe6060f1SDimitry Andric     ompd_bp_task_end();
1935fe6060f1SDimitry Andric #endif
1936fe6060f1SDimitry Andric 
19370b57cec5SDimitry Andric   // Proxy tasks are not handled by the runtime
19380b57cec5SDimitry Andric   if (taskdata->td_flags.proxy != TASK_PROXY) {
19390b57cec5SDimitry Andric #if OMPT_SUPPORT
19400b57cec5SDimitry Andric     if (UNLIKELY(ompt_enabled.enabled)) {
19410b57cec5SDimitry Andric       thread->th.ompt_thread_info = oldInfo;
19420b57cec5SDimitry Andric       if (taskdata->td_flags.tiedness == TASK_TIED) {
19430b57cec5SDimitry Andric         taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
19440b57cec5SDimitry Andric       }
19450b57cec5SDimitry Andric       __kmp_task_finish<true>(gtid, task, current_task);
19460b57cec5SDimitry Andric     } else
19470b57cec5SDimitry Andric #endif
19480b57cec5SDimitry Andric       __kmp_task_finish<false>(gtid, task, current_task);
19490b57cec5SDimitry Andric   }
1950*0fca6ea1SDimitry Andric #if OMPT_SUPPORT
1951*0fca6ea1SDimitry Andric   else if (UNLIKELY(ompt_enabled.enabled && taskdata->td_flags.target)) {
1952*0fca6ea1SDimitry Andric     __ompt_task_finish(task, current_task, ompt_task_switch);
1953*0fca6ea1SDimitry Andric   }
1954*0fca6ea1SDimitry Andric #endif
19550b57cec5SDimitry Andric 
19560b57cec5SDimitry Andric   KA_TRACE(
19570b57cec5SDimitry Andric       30,
19580b57cec5SDimitry Andric       ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
19590b57cec5SDimitry Andric        gtid, taskdata, current_task));
19600b57cec5SDimitry Andric   return;
19610b57cec5SDimitry Andric }
19620b57cec5SDimitry Andric 
19630b57cec5SDimitry Andric // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
19640b57cec5SDimitry Andric //
19650b57cec5SDimitry Andric // loc_ref: location of original task pragma (ignored)
19660b57cec5SDimitry Andric // gtid: Global Thread ID of encountering thread
19670b57cec5SDimitry Andric // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
19680b57cec5SDimitry Andric // Returns:
19690b57cec5SDimitry Andric //    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
19700b57cec5SDimitry Andric //    be resumed later.
19710b57cec5SDimitry Andric //    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
19720b57cec5SDimitry Andric //    resumed later.
19730b57cec5SDimitry Andric kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
19740b57cec5SDimitry Andric                                 kmp_task_t *new_task) {
19750b57cec5SDimitry Andric   kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
19760b57cec5SDimitry Andric 
19770b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
19780b57cec5SDimitry Andric                 loc_ref, new_taskdata));
19790b57cec5SDimitry Andric 
19800b57cec5SDimitry Andric #if OMPT_SUPPORT
19810b57cec5SDimitry Andric   kmp_taskdata_t *parent;
19820b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled)) {
19830b57cec5SDimitry Andric     parent = new_taskdata->td_parent;
19840b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_task_create) {
19850b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1986fe6060f1SDimitry Andric           &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1987*0fca6ea1SDimitry Andric           &(new_taskdata->ompt_task_info.task_data),
1988*0fca6ea1SDimitry Andric           TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
19890b57cec5SDimitry Andric           OMPT_GET_RETURN_ADDRESS(0));
19900b57cec5SDimitry Andric     }
19910b57cec5SDimitry Andric   }
19920b57cec5SDimitry Andric #endif
19930b57cec5SDimitry Andric 
19940b57cec5SDimitry Andric   /* Should we execute the new task or queue it? For now, let's just always try
19950b57cec5SDimitry Andric      to queue it.  If the queue fills up, then we'll execute it.  */
19960b57cec5SDimitry Andric 
19970b57cec5SDimitry Andric   if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
19980b57cec5SDimitry Andric   { // Execute this task immediately
19990b57cec5SDimitry Andric     kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
20000b57cec5SDimitry Andric     new_taskdata->td_flags.task_serial = 1;
20010b57cec5SDimitry Andric     __kmp_invoke_task(gtid, new_task, current_task);
20020b57cec5SDimitry Andric   }
20030b57cec5SDimitry Andric 
20040b57cec5SDimitry Andric   KA_TRACE(
20050b57cec5SDimitry Andric       10,
20060b57cec5SDimitry Andric       ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
20070b57cec5SDimitry Andric        "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
20080b57cec5SDimitry Andric        gtid, loc_ref, new_taskdata));
20090b57cec5SDimitry Andric 
20100b57cec5SDimitry Andric #if OMPT_SUPPORT
20110b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled)) {
20120b57cec5SDimitry Andric     parent->ompt_task_info.frame.enter_frame = ompt_data_none;
20130b57cec5SDimitry Andric   }
20140b57cec5SDimitry Andric #endif
20150b57cec5SDimitry Andric   return TASK_CURRENT_NOT_QUEUED;
20160b57cec5SDimitry Andric }
20170b57cec5SDimitry Andric 
20180b57cec5SDimitry Andric // __kmp_omp_task: Schedule a non-thread-switchable task for execution
20190b57cec5SDimitry Andric //
20200b57cec5SDimitry Andric // gtid: Global Thread ID of encountering thread
20210b57cec5SDimitry Andric // new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
20220b57cec5SDimitry Andric // serialize_immediate: if TRUE then if the task is executed immediately its
20230b57cec5SDimitry Andric // execution will be serialized
20240b57cec5SDimitry Andric // Returns:
20250b57cec5SDimitry Andric //    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
20260b57cec5SDimitry Andric //    be resumed later.
20270b57cec5SDimitry Andric //    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
20280b57cec5SDimitry Andric //    resumed later.
20290b57cec5SDimitry Andric kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
20300b57cec5SDimitry Andric                          bool serialize_immediate) {
20310b57cec5SDimitry Andric   kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
20320b57cec5SDimitry Andric 
203306c3fb27SDimitry Andric #if OMPX_TASKGRAPH
203406c3fb27SDimitry Andric   if (new_taskdata->is_taskgraph &&
203506c3fb27SDimitry Andric       __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
203606c3fb27SDimitry Andric     kmp_tdg_info_t *tdg = new_taskdata->tdg;
203706c3fb27SDimitry Andric     // extend the record_map if needed
203806c3fb27SDimitry Andric     if (new_taskdata->td_task_id >= new_taskdata->tdg->map_size) {
203906c3fb27SDimitry Andric       __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
204006c3fb27SDimitry Andric       // map_size could have been updated by another thread if recursive
204106c3fb27SDimitry Andric       // taskloop
204206c3fb27SDimitry Andric       if (new_taskdata->td_task_id >= tdg->map_size) {
204306c3fb27SDimitry Andric         kmp_uint old_size = tdg->map_size;
204406c3fb27SDimitry Andric         kmp_uint new_size = old_size * 2;
204506c3fb27SDimitry Andric         kmp_node_info_t *old_record = tdg->record_map;
204606c3fb27SDimitry Andric         kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
204706c3fb27SDimitry Andric             new_size * sizeof(kmp_node_info_t));
204806c3fb27SDimitry Andric 
204906c3fb27SDimitry Andric         KMP_MEMCPY(new_record, old_record, old_size * sizeof(kmp_node_info_t));
205006c3fb27SDimitry Andric         tdg->record_map = new_record;
205106c3fb27SDimitry Andric 
205206c3fb27SDimitry Andric         __kmp_free(old_record);
205306c3fb27SDimitry Andric 
205406c3fb27SDimitry Andric         for (kmp_int i = old_size; i < new_size; i++) {
205506c3fb27SDimitry Andric           kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
205606c3fb27SDimitry Andric               __kmp_successors_size * sizeof(kmp_int32));
205706c3fb27SDimitry Andric           new_record[i].task = nullptr;
205806c3fb27SDimitry Andric           new_record[i].successors = successorsList;
205906c3fb27SDimitry Andric           new_record[i].nsuccessors = 0;
206006c3fb27SDimitry Andric           new_record[i].npredecessors = 0;
206106c3fb27SDimitry Andric           new_record[i].successors_size = __kmp_successors_size;
206206c3fb27SDimitry Andric           KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
206306c3fb27SDimitry Andric         }
206406c3fb27SDimitry Andric         // update the size at the end, so that we avoid other
206506c3fb27SDimitry Andric         // threads use old_record while map_size is already updated
206606c3fb27SDimitry Andric         tdg->map_size = new_size;
206706c3fb27SDimitry Andric       }
206806c3fb27SDimitry Andric       __kmp_release_bootstrap_lock(&tdg->graph_lock);
206906c3fb27SDimitry Andric     }
207006c3fb27SDimitry Andric     // record a task
207106c3fb27SDimitry Andric     if (tdg->record_map[new_taskdata->td_task_id].task == nullptr) {
207206c3fb27SDimitry Andric       tdg->record_map[new_taskdata->td_task_id].task = new_task;
207306c3fb27SDimitry Andric       tdg->record_map[new_taskdata->td_task_id].parent_task =
207406c3fb27SDimitry Andric           new_taskdata->td_parent;
207506c3fb27SDimitry Andric       KMP_ATOMIC_INC(&tdg->num_tasks);
207606c3fb27SDimitry Andric     }
207706c3fb27SDimitry Andric   }
207806c3fb27SDimitry Andric #endif
207906c3fb27SDimitry Andric 
20800b57cec5SDimitry Andric   /* Should we execute the new task or queue it? For now, let's just always try
20810b57cec5SDimitry Andric      to queue it.  If the queue fills up, then we'll execute it.  */
20820b57cec5SDimitry Andric   if (new_taskdata->td_flags.proxy == TASK_PROXY ||
20830b57cec5SDimitry Andric       __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
20840b57cec5SDimitry Andric   { // Execute this task immediately
20850b57cec5SDimitry Andric     kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
20860b57cec5SDimitry Andric     if (serialize_immediate)
20870b57cec5SDimitry Andric       new_taskdata->td_flags.task_serial = 1;
20880b57cec5SDimitry Andric     __kmp_invoke_task(gtid, new_task, current_task);
208981ad6265SDimitry Andric   } else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
209081ad6265SDimitry Andric              __kmp_wpolicy_passive) {
209181ad6265SDimitry Andric     kmp_info_t *this_thr = __kmp_threads[gtid];
209281ad6265SDimitry Andric     kmp_team_t *team = this_thr->th.th_team;
209381ad6265SDimitry Andric     kmp_int32 nthreads = this_thr->th.th_team_nproc;
209481ad6265SDimitry Andric     for (int i = 0; i < nthreads; ++i) {
209581ad6265SDimitry Andric       kmp_info_t *thread = team->t.t_threads[i];
209681ad6265SDimitry Andric       if (thread == this_thr)
209781ad6265SDimitry Andric         continue;
209881ad6265SDimitry Andric       if (thread->th.th_sleep_loc != NULL) {
209981ad6265SDimitry Andric         __kmp_null_resume_wrapper(thread);
210081ad6265SDimitry Andric         break; // awake one thread at a time
21010b57cec5SDimitry Andric       }
210281ad6265SDimitry Andric     }
210381ad6265SDimitry Andric   }
21040b57cec5SDimitry Andric   return TASK_CURRENT_NOT_QUEUED;
21050b57cec5SDimitry Andric }
21060b57cec5SDimitry Andric 
21070b57cec5SDimitry Andric // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a
21080b57cec5SDimitry Andric // non-thread-switchable task from the parent thread only!
21090b57cec5SDimitry Andric //
21100b57cec5SDimitry Andric // loc_ref: location of original task pragma (ignored)
21110b57cec5SDimitry Andric // gtid: Global Thread ID of encountering thread
21120b57cec5SDimitry Andric // new_task: non-thread-switchable task thunk allocated by
21130b57cec5SDimitry Andric // __kmp_omp_task_alloc()
21140b57cec5SDimitry Andric // Returns:
21150b57cec5SDimitry Andric //    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
21160b57cec5SDimitry Andric //    be resumed later.
21170b57cec5SDimitry Andric //    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
21180b57cec5SDimitry Andric //    resumed later.
21190b57cec5SDimitry Andric kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
21200b57cec5SDimitry Andric                           kmp_task_t *new_task) {
21210b57cec5SDimitry Andric   kmp_int32 res;
21220b57cec5SDimitry Andric   KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
21230b57cec5SDimitry Andric 
21240b57cec5SDimitry Andric #if KMP_DEBUG || OMPT_SUPPORT
21250b57cec5SDimitry Andric   kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
21260b57cec5SDimitry Andric #endif
21270b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
21280b57cec5SDimitry Andric                 new_taskdata));
2129e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
21300b57cec5SDimitry Andric 
21310b57cec5SDimitry Andric #if OMPT_SUPPORT
21320b57cec5SDimitry Andric   kmp_taskdata_t *parent = NULL;
21330b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled)) {
21340b57cec5SDimitry Andric     if (!new_taskdata->td_flags.started) {
21350b57cec5SDimitry Andric       OMPT_STORE_RETURN_ADDRESS(gtid);
21360b57cec5SDimitry Andric       parent = new_taskdata->td_parent;
21370b57cec5SDimitry Andric       if (!parent->ompt_task_info.frame.enter_frame.ptr) {
2138fe6060f1SDimitry Andric         parent->ompt_task_info.frame.enter_frame.ptr =
2139fe6060f1SDimitry Andric             OMPT_GET_FRAME_ADDRESS(0);
21400b57cec5SDimitry Andric       }
21410b57cec5SDimitry Andric       if (ompt_enabled.ompt_callback_task_create) {
21420b57cec5SDimitry Andric         ompt_callbacks.ompt_callback(ompt_callback_task_create)(
2143fe6060f1SDimitry Andric             &(parent->ompt_task_info.task_data),
2144fe6060f1SDimitry Andric             &(parent->ompt_task_info.frame),
21450b57cec5SDimitry Andric             &(new_taskdata->ompt_task_info.task_data),
2146*0fca6ea1SDimitry Andric             TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
21470b57cec5SDimitry Andric             OMPT_LOAD_RETURN_ADDRESS(gtid));
21480b57cec5SDimitry Andric       }
21490b57cec5SDimitry Andric     } else {
21500b57cec5SDimitry Andric       // We are scheduling the continuation of an UNTIED task.
21510b57cec5SDimitry Andric       // Scheduling back to the parent task.
21520b57cec5SDimitry Andric       __ompt_task_finish(new_task,
21530b57cec5SDimitry Andric                          new_taskdata->ompt_task_info.scheduling_parent,
21540b57cec5SDimitry Andric                          ompt_task_switch);
21550b57cec5SDimitry Andric       new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
21560b57cec5SDimitry Andric     }
21570b57cec5SDimitry Andric   }
21580b57cec5SDimitry Andric #endif
21590b57cec5SDimitry Andric 
21600b57cec5SDimitry Andric   res = __kmp_omp_task(gtid, new_task, true);
21610b57cec5SDimitry Andric 
21620b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
21630b57cec5SDimitry Andric                 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
21640b57cec5SDimitry Andric                 gtid, loc_ref, new_taskdata));
21650b57cec5SDimitry Andric #if OMPT_SUPPORT
21660b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
21670b57cec5SDimitry Andric     parent->ompt_task_info.frame.enter_frame = ompt_data_none;
21680b57cec5SDimitry Andric   }
21690b57cec5SDimitry Andric #endif
21700b57cec5SDimitry Andric   return res;
21710b57cec5SDimitry Andric }
21720b57cec5SDimitry Andric 
21730b57cec5SDimitry Andric // __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule
21740b57cec5SDimitry Andric // a taskloop task with the correct OMPT return address
21750b57cec5SDimitry Andric //
21760b57cec5SDimitry Andric // loc_ref: location of original task pragma (ignored)
21770b57cec5SDimitry Andric // gtid: Global Thread ID of encountering thread
21780b57cec5SDimitry Andric // new_task: non-thread-switchable task thunk allocated by
21790b57cec5SDimitry Andric // __kmp_omp_task_alloc()
21800b57cec5SDimitry Andric // codeptr_ra: return address for OMPT callback
21810b57cec5SDimitry Andric // Returns:
21820b57cec5SDimitry Andric //    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
21830b57cec5SDimitry Andric //    be resumed later.
21840b57cec5SDimitry Andric //    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
21850b57cec5SDimitry Andric //    resumed later.
21860b57cec5SDimitry Andric kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid,
21870b57cec5SDimitry Andric                                   kmp_task_t *new_task, void *codeptr_ra) {
21880b57cec5SDimitry Andric   kmp_int32 res;
21890b57cec5SDimitry Andric   KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
21900b57cec5SDimitry Andric 
21910b57cec5SDimitry Andric #if KMP_DEBUG || OMPT_SUPPORT
21920b57cec5SDimitry Andric   kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
21930b57cec5SDimitry Andric #endif
21940b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
21950b57cec5SDimitry Andric                 new_taskdata));
21960b57cec5SDimitry Andric 
21970b57cec5SDimitry Andric #if OMPT_SUPPORT
21980b57cec5SDimitry Andric   kmp_taskdata_t *parent = NULL;
21990b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
22000b57cec5SDimitry Andric     parent = new_taskdata->td_parent;
22010b57cec5SDimitry Andric     if (!parent->ompt_task_info.frame.enter_frame.ptr)
22020b57cec5SDimitry Andric       parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
22030b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_task_create) {
22040b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_task_create)(
2205fe6060f1SDimitry Andric           &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
22060b57cec5SDimitry Andric           &(new_taskdata->ompt_task_info.task_data),
2207*0fca6ea1SDimitry Andric           TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, codeptr_ra);
22080b57cec5SDimitry Andric     }
22090b57cec5SDimitry Andric   }
22100b57cec5SDimitry Andric #endif
22110b57cec5SDimitry Andric 
22120b57cec5SDimitry Andric   res = __kmp_omp_task(gtid, new_task, true);
22130b57cec5SDimitry Andric 
22140b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
22150b57cec5SDimitry Andric                 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
22160b57cec5SDimitry Andric                 gtid, loc_ref, new_taskdata));
22170b57cec5SDimitry Andric #if OMPT_SUPPORT
22180b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
22190b57cec5SDimitry Andric     parent->ompt_task_info.frame.enter_frame = ompt_data_none;
22200b57cec5SDimitry Andric   }
22210b57cec5SDimitry Andric #endif
22220b57cec5SDimitry Andric   return res;
22230b57cec5SDimitry Andric }
22240b57cec5SDimitry Andric 
22250b57cec5SDimitry Andric template <bool ompt>
22260b57cec5SDimitry Andric static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
22270b57cec5SDimitry Andric                                               void *frame_address,
22280b57cec5SDimitry Andric                                               void *return_address) {
2229fe6060f1SDimitry Andric   kmp_taskdata_t *taskdata = nullptr;
22300b57cec5SDimitry Andric   kmp_info_t *thread;
22310b57cec5SDimitry Andric   int thread_finished = FALSE;
22320b57cec5SDimitry Andric   KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
22330b57cec5SDimitry Andric 
22340b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
2235fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= 0);
22360b57cec5SDimitry Andric 
22370b57cec5SDimitry Andric   if (__kmp_tasking_mode != tskm_immediate_exec) {
22380b57cec5SDimitry Andric     thread = __kmp_threads[gtid];
22390b57cec5SDimitry Andric     taskdata = thread->th.th_current_task;
22400b57cec5SDimitry Andric 
22410b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
22420b57cec5SDimitry Andric     ompt_data_t *my_task_data;
22430b57cec5SDimitry Andric     ompt_data_t *my_parallel_data;
22440b57cec5SDimitry Andric 
22450b57cec5SDimitry Andric     if (ompt) {
22460b57cec5SDimitry Andric       my_task_data = &(taskdata->ompt_task_info.task_data);
22470b57cec5SDimitry Andric       my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
22480b57cec5SDimitry Andric 
22490b57cec5SDimitry Andric       taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
22500b57cec5SDimitry Andric 
22510b57cec5SDimitry Andric       if (ompt_enabled.ompt_callback_sync_region) {
22520b57cec5SDimitry Andric         ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
22530b57cec5SDimitry Andric             ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
22540b57cec5SDimitry Andric             my_task_data, return_address);
22550b57cec5SDimitry Andric       }
22560b57cec5SDimitry Andric 
22570b57cec5SDimitry Andric       if (ompt_enabled.ompt_callback_sync_region_wait) {
22580b57cec5SDimitry Andric         ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
22590b57cec5SDimitry Andric             ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
22600b57cec5SDimitry Andric             my_task_data, return_address);
22610b57cec5SDimitry Andric       }
22620b57cec5SDimitry Andric     }
22630b57cec5SDimitry Andric #endif // OMPT_SUPPORT && OMPT_OPTIONAL
22640b57cec5SDimitry Andric 
22650b57cec5SDimitry Andric // Debugger: The taskwait is active. Store location and thread encountered the
22660b57cec5SDimitry Andric // taskwait.
22670b57cec5SDimitry Andric #if USE_ITT_BUILD
22680b57cec5SDimitry Andric // Note: These values are used by ITT events as well.
22690b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
22700b57cec5SDimitry Andric     taskdata->td_taskwait_counter += 1;
22710b57cec5SDimitry Andric     taskdata->td_taskwait_ident = loc_ref;
22720b57cec5SDimitry Andric     taskdata->td_taskwait_thread = gtid + 1;
22730b57cec5SDimitry Andric 
22740b57cec5SDimitry Andric #if USE_ITT_BUILD
2275fe6060f1SDimitry Andric     void *itt_sync_obj = NULL;
2276fe6060f1SDimitry Andric #if USE_ITT_NOTIFY
2277fe6060f1SDimitry Andric     KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2278fe6060f1SDimitry Andric #endif /* USE_ITT_NOTIFY */
22790b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
22800b57cec5SDimitry Andric 
22810b57cec5SDimitry Andric     bool must_wait =
22820b57cec5SDimitry Andric         !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
22830b57cec5SDimitry Andric 
22840b57cec5SDimitry Andric     must_wait = must_wait || (thread->th.th_task_team != NULL &&
22850b57cec5SDimitry Andric                               thread->th.th_task_team->tt.tt_found_proxy_tasks);
2286e8d8bef9SDimitry Andric     // If hidden helper thread is encountered, we must enable wait here.
2287e8d8bef9SDimitry Andric     must_wait =
2288e8d8bef9SDimitry Andric         must_wait ||
2289e8d8bef9SDimitry Andric         (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
2290e8d8bef9SDimitry Andric          thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
2291e8d8bef9SDimitry Andric 
22920b57cec5SDimitry Andric     if (must_wait) {
2293e8d8bef9SDimitry Andric       kmp_flag_32<false, false> flag(
2294e8d8bef9SDimitry Andric           RCAST(std::atomic<kmp_uint32> *,
22950b57cec5SDimitry Andric                 &(taskdata->td_incomplete_child_tasks)),
22960b57cec5SDimitry Andric           0U);
22970b57cec5SDimitry Andric       while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
22980b57cec5SDimitry Andric         flag.execute_tasks(thread, gtid, FALSE,
22990b57cec5SDimitry Andric                            &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
23000b57cec5SDimitry Andric                            __kmp_task_stealing_constraint);
23010b57cec5SDimitry Andric       }
23020b57cec5SDimitry Andric     }
23030b57cec5SDimitry Andric #if USE_ITT_BUILD
2304fe6060f1SDimitry Andric     KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2305e8d8bef9SDimitry Andric     KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with children
23060b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
23070b57cec5SDimitry Andric 
23080b57cec5SDimitry Andric     // Debugger:  The taskwait is completed. Location remains, but thread is
23090b57cec5SDimitry Andric     // negated.
23100b57cec5SDimitry Andric     taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
23110b57cec5SDimitry Andric 
23120b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
23130b57cec5SDimitry Andric     if (ompt) {
23140b57cec5SDimitry Andric       if (ompt_enabled.ompt_callback_sync_region_wait) {
23150b57cec5SDimitry Andric         ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
23160b57cec5SDimitry Andric             ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
23170b57cec5SDimitry Andric             my_task_data, return_address);
23180b57cec5SDimitry Andric       }
23190b57cec5SDimitry Andric       if (ompt_enabled.ompt_callback_sync_region) {
23200b57cec5SDimitry Andric         ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
23210b57cec5SDimitry Andric             ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
23220b57cec5SDimitry Andric             my_task_data, return_address);
23230b57cec5SDimitry Andric       }
23240b57cec5SDimitry Andric       taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
23250b57cec5SDimitry Andric     }
23260b57cec5SDimitry Andric #endif // OMPT_SUPPORT && OMPT_OPTIONAL
23270b57cec5SDimitry Andric   }
23280b57cec5SDimitry Andric 
23290b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
23300b57cec5SDimitry Andric                 "returning TASK_CURRENT_NOT_QUEUED\n",
23310b57cec5SDimitry Andric                 gtid, taskdata));
23320b57cec5SDimitry Andric 
23330b57cec5SDimitry Andric   return TASK_CURRENT_NOT_QUEUED;
23340b57cec5SDimitry Andric }
23350b57cec5SDimitry Andric 
23360b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
23370b57cec5SDimitry Andric OMPT_NOINLINE
23380b57cec5SDimitry Andric static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid,
23390b57cec5SDimitry Andric                                           void *frame_address,
23400b57cec5SDimitry Andric                                           void *return_address) {
23410b57cec5SDimitry Andric   return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
23420b57cec5SDimitry Andric                                             return_address);
23430b57cec5SDimitry Andric }
23440b57cec5SDimitry Andric #endif // OMPT_SUPPORT && OMPT_OPTIONAL
23450b57cec5SDimitry Andric 
23460b57cec5SDimitry Andric // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
23470b57cec5SDimitry Andric // complete
23480b57cec5SDimitry Andric kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
23490b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
23500b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled)) {
23510b57cec5SDimitry Andric     OMPT_STORE_RETURN_ADDRESS(gtid);
23520b57cec5SDimitry Andric     return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
23530b57cec5SDimitry Andric                                     OMPT_LOAD_RETURN_ADDRESS(gtid));
23540b57cec5SDimitry Andric   }
23550b57cec5SDimitry Andric #endif
23560b57cec5SDimitry Andric   return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
23570b57cec5SDimitry Andric }
23580b57cec5SDimitry Andric 
23590b57cec5SDimitry Andric // __kmpc_omp_taskyield: switch to a different task
23600b57cec5SDimitry Andric kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
2361fe6060f1SDimitry Andric   kmp_taskdata_t *taskdata = NULL;
23620b57cec5SDimitry Andric   kmp_info_t *thread;
23630b57cec5SDimitry Andric   int thread_finished = FALSE;
23640b57cec5SDimitry Andric 
23650b57cec5SDimitry Andric   KMP_COUNT_BLOCK(OMP_TASKYIELD);
23660b57cec5SDimitry Andric   KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
23670b57cec5SDimitry Andric 
23680b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
23690b57cec5SDimitry Andric                 gtid, loc_ref, end_part));
2370e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
23710b57cec5SDimitry Andric 
23720b57cec5SDimitry Andric   if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
23730b57cec5SDimitry Andric     thread = __kmp_threads[gtid];
23740b57cec5SDimitry Andric     taskdata = thread->th.th_current_task;
23750b57cec5SDimitry Andric // Should we model this as a task wait or not?
23760b57cec5SDimitry Andric // Debugger: The taskwait is active. Store location and thread encountered the
23770b57cec5SDimitry Andric // taskwait.
23780b57cec5SDimitry Andric #if USE_ITT_BUILD
23790b57cec5SDimitry Andric // Note: These values are used by ITT events as well.
23800b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
23810b57cec5SDimitry Andric     taskdata->td_taskwait_counter += 1;
23820b57cec5SDimitry Andric     taskdata->td_taskwait_ident = loc_ref;
23830b57cec5SDimitry Andric     taskdata->td_taskwait_thread = gtid + 1;
23840b57cec5SDimitry Andric 
23850b57cec5SDimitry Andric #if USE_ITT_BUILD
2386fe6060f1SDimitry Andric     void *itt_sync_obj = NULL;
2387fe6060f1SDimitry Andric #if USE_ITT_NOTIFY
2388fe6060f1SDimitry Andric     KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2389fe6060f1SDimitry Andric #endif /* USE_ITT_NOTIFY */
23900b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
23910b57cec5SDimitry Andric     if (!taskdata->td_flags.team_serial) {
23920b57cec5SDimitry Andric       kmp_task_team_t *task_team = thread->th.th_task_team;
23930b57cec5SDimitry Andric       if (task_team != NULL) {
23940b57cec5SDimitry Andric         if (KMP_TASKING_ENABLED(task_team)) {
23950b57cec5SDimitry Andric #if OMPT_SUPPORT
23960b57cec5SDimitry Andric           if (UNLIKELY(ompt_enabled.enabled))
23970b57cec5SDimitry Andric             thread->th.ompt_thread_info.ompt_task_yielded = 1;
23980b57cec5SDimitry Andric #endif
23990b57cec5SDimitry Andric           __kmp_execute_tasks_32(
2400e8d8bef9SDimitry Andric               thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
24010b57cec5SDimitry Andric               &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
24020b57cec5SDimitry Andric               __kmp_task_stealing_constraint);
24030b57cec5SDimitry Andric #if OMPT_SUPPORT
24040b57cec5SDimitry Andric           if (UNLIKELY(ompt_enabled.enabled))
24050b57cec5SDimitry Andric             thread->th.ompt_thread_info.ompt_task_yielded = 0;
24060b57cec5SDimitry Andric #endif
24070b57cec5SDimitry Andric         }
24080b57cec5SDimitry Andric       }
24090b57cec5SDimitry Andric     }
24100b57cec5SDimitry Andric #if USE_ITT_BUILD
2411fe6060f1SDimitry Andric     KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
24120b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
24130b57cec5SDimitry Andric 
24140b57cec5SDimitry Andric     // Debugger:  The taskwait is completed. Location remains, but thread is
24150b57cec5SDimitry Andric     // negated.
24160b57cec5SDimitry Andric     taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
24170b57cec5SDimitry Andric   }
24180b57cec5SDimitry Andric 
24190b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
24200b57cec5SDimitry Andric                 "returning TASK_CURRENT_NOT_QUEUED\n",
24210b57cec5SDimitry Andric                 gtid, taskdata));
24220b57cec5SDimitry Andric 
24230b57cec5SDimitry Andric   return TASK_CURRENT_NOT_QUEUED;
24240b57cec5SDimitry Andric }
24250b57cec5SDimitry Andric 
24260b57cec5SDimitry Andric // Task Reduction implementation
24270b57cec5SDimitry Andric //
24280b57cec5SDimitry Andric // Note: initial implementation didn't take into account the possibility
24290b57cec5SDimitry Andric // to specify omp_orig for initializer of the UDR (user defined reduction).
24300b57cec5SDimitry Andric // Corrected implementation takes into account the omp_orig object.
24310b57cec5SDimitry Andric // Compiler is free to use old implementation if omp_orig is not specified.
24320b57cec5SDimitry Andric 
24330b57cec5SDimitry Andric /*!
24340b57cec5SDimitry Andric @ingroup BASIC_TYPES
24350b57cec5SDimitry Andric @{
24360b57cec5SDimitry Andric */
24370b57cec5SDimitry Andric 
24380b57cec5SDimitry Andric /*!
24390b57cec5SDimitry Andric Flags for special info per task reduction item.
24400b57cec5SDimitry Andric */
24410b57cec5SDimitry Andric typedef struct kmp_taskred_flags {
244281ad6265SDimitry Andric   /*! 1 - use lazy alloc/init (e.g. big objects, num tasks < num threads) */
24430b57cec5SDimitry Andric   unsigned lazy_priv : 1;
24440b57cec5SDimitry Andric   unsigned reserved31 : 31;
24450b57cec5SDimitry Andric } kmp_taskred_flags_t;
24460b57cec5SDimitry Andric 
24470b57cec5SDimitry Andric /*!
24480b57cec5SDimitry Andric Internal struct for reduction data item related info set up by compiler.
24490b57cec5SDimitry Andric */
24500b57cec5SDimitry Andric typedef struct kmp_task_red_input {
24510b57cec5SDimitry Andric   void *reduce_shar; /**< shared between tasks item to reduce into */
24520b57cec5SDimitry Andric   size_t reduce_size; /**< size of data item in bytes */
24530b57cec5SDimitry Andric   // three compiler-generated routines (init, fini are optional):
24540b57cec5SDimitry Andric   void *reduce_init; /**< data initialization routine (single parameter) */
24550b57cec5SDimitry Andric   void *reduce_fini; /**< data finalization routine */
24560b57cec5SDimitry Andric   void *reduce_comb; /**< data combiner routine */
24570b57cec5SDimitry Andric   kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
24580b57cec5SDimitry Andric } kmp_task_red_input_t;
24590b57cec5SDimitry Andric 
24600b57cec5SDimitry Andric /*!
24610b57cec5SDimitry Andric Internal struct for reduction data item related info saved by the library.
24620b57cec5SDimitry Andric */
24630b57cec5SDimitry Andric typedef struct kmp_taskred_data {
24640b57cec5SDimitry Andric   void *reduce_shar; /**< shared between tasks item to reduce into */
24650b57cec5SDimitry Andric   size_t reduce_size; /**< size of data item */
24660b57cec5SDimitry Andric   kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
24670b57cec5SDimitry Andric   void *reduce_priv; /**< array of thread specific items */
24680b57cec5SDimitry Andric   void *reduce_pend; /**< end of private data for faster comparison op */
24690b57cec5SDimitry Andric   // three compiler-generated routines (init, fini are optional):
24700b57cec5SDimitry Andric   void *reduce_comb; /**< data combiner routine */
24710b57cec5SDimitry Andric   void *reduce_init; /**< data initialization routine (two parameters) */
24720b57cec5SDimitry Andric   void *reduce_fini; /**< data finalization routine */
24730b57cec5SDimitry Andric   void *reduce_orig; /**< original item (can be used in UDR initializer) */
24740b57cec5SDimitry Andric } kmp_taskred_data_t;
24750b57cec5SDimitry Andric 
24760b57cec5SDimitry Andric /*!
24770b57cec5SDimitry Andric Internal struct for reduction data item related info set up by compiler.
24780b57cec5SDimitry Andric 
24790b57cec5SDimitry Andric New interface: added reduce_orig field to provide omp_orig for UDR initializer.
24800b57cec5SDimitry Andric */
24810b57cec5SDimitry Andric typedef struct kmp_taskred_input {
24820b57cec5SDimitry Andric   void *reduce_shar; /**< shared between tasks item to reduce into */
24830b57cec5SDimitry Andric   void *reduce_orig; /**< original reduction item used for initialization */
24840b57cec5SDimitry Andric   size_t reduce_size; /**< size of data item */
24850b57cec5SDimitry Andric   // three compiler-generated routines (init, fini are optional):
24860b57cec5SDimitry Andric   void *reduce_init; /**< data initialization routine (two parameters) */
24870b57cec5SDimitry Andric   void *reduce_fini; /**< data finalization routine */
24880b57cec5SDimitry Andric   void *reduce_comb; /**< data combiner routine */
24890b57cec5SDimitry Andric   kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
24900b57cec5SDimitry Andric } kmp_taskred_input_t;
24910b57cec5SDimitry Andric /*!
24920b57cec5SDimitry Andric @}
24930b57cec5SDimitry Andric */
24940b57cec5SDimitry Andric 
24950b57cec5SDimitry Andric template <typename T> void __kmp_assign_orig(kmp_taskred_data_t &item, T &src);
24960b57cec5SDimitry Andric template <>
24970b57cec5SDimitry Andric void __kmp_assign_orig<kmp_task_red_input_t>(kmp_taskred_data_t &item,
24980b57cec5SDimitry Andric                                              kmp_task_red_input_t &src) {
24990b57cec5SDimitry Andric   item.reduce_orig = NULL;
25000b57cec5SDimitry Andric }
25010b57cec5SDimitry Andric template <>
25020b57cec5SDimitry Andric void __kmp_assign_orig<kmp_taskred_input_t>(kmp_taskred_data_t &item,
25030b57cec5SDimitry Andric                                             kmp_taskred_input_t &src) {
25040b57cec5SDimitry Andric   if (src.reduce_orig != NULL) {
25050b57cec5SDimitry Andric     item.reduce_orig = src.reduce_orig;
25060b57cec5SDimitry Andric   } else {
25070b57cec5SDimitry Andric     item.reduce_orig = src.reduce_shar;
25080b57cec5SDimitry Andric   } // non-NULL reduce_orig means new interface used
25090b57cec5SDimitry Andric }
25100b57cec5SDimitry Andric 
2511e8d8bef9SDimitry Andric template <typename T> void __kmp_call_init(kmp_taskred_data_t &item, size_t j);
25120b57cec5SDimitry Andric template <>
25130b57cec5SDimitry Andric void __kmp_call_init<kmp_task_red_input_t>(kmp_taskred_data_t &item,
2514e8d8bef9SDimitry Andric                                            size_t offset) {
25150b57cec5SDimitry Andric   ((void (*)(void *))item.reduce_init)((char *)(item.reduce_priv) + offset);
25160b57cec5SDimitry Andric }
25170b57cec5SDimitry Andric template <>
25180b57cec5SDimitry Andric void __kmp_call_init<kmp_taskred_input_t>(kmp_taskred_data_t &item,
2519e8d8bef9SDimitry Andric                                           size_t offset) {
25200b57cec5SDimitry Andric   ((void (*)(void *, void *))item.reduce_init)(
25210b57cec5SDimitry Andric       (char *)(item.reduce_priv) + offset, item.reduce_orig);
25220b57cec5SDimitry Andric }
25230b57cec5SDimitry Andric 
25240b57cec5SDimitry Andric template <typename T>
25250b57cec5SDimitry Andric void *__kmp_task_reduction_init(int gtid, int num, T *data) {
2526e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
25270b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
25280b57cec5SDimitry Andric   kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2529e8d8bef9SDimitry Andric   kmp_uint32 nth = thread->th.th_team_nproc;
25300b57cec5SDimitry Andric   kmp_taskred_data_t *arr;
25310b57cec5SDimitry Andric 
25320b57cec5SDimitry Andric   // check input data just in case
25330b57cec5SDimitry Andric   KMP_ASSERT(tg != NULL);
25340b57cec5SDimitry Andric   KMP_ASSERT(data != NULL);
25350b57cec5SDimitry Andric   KMP_ASSERT(num > 0);
25365f757f3fSDimitry Andric   if (nth == 1 && !__kmp_enable_hidden_helper) {
25370b57cec5SDimitry Andric     KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
25380b57cec5SDimitry Andric                   gtid, tg));
25390b57cec5SDimitry Andric     return (void *)tg;
25400b57cec5SDimitry Andric   }
25410b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
25420b57cec5SDimitry Andric                 gtid, tg, num));
25430b57cec5SDimitry Andric   arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
25440b57cec5SDimitry Andric       thread, num * sizeof(kmp_taskred_data_t));
25450b57cec5SDimitry Andric   for (int i = 0; i < num; ++i) {
25460b57cec5SDimitry Andric     size_t size = data[i].reduce_size - 1;
25470b57cec5SDimitry Andric     // round the size up to cache line per thread-specific item
25480b57cec5SDimitry Andric     size += CACHE_LINE - size % CACHE_LINE;
25490b57cec5SDimitry Andric     KMP_ASSERT(data[i].reduce_comb != NULL); // combiner is mandatory
25500b57cec5SDimitry Andric     arr[i].reduce_shar = data[i].reduce_shar;
25510b57cec5SDimitry Andric     arr[i].reduce_size = size;
25520b57cec5SDimitry Andric     arr[i].flags = data[i].flags;
25530b57cec5SDimitry Andric     arr[i].reduce_comb = data[i].reduce_comb;
25540b57cec5SDimitry Andric     arr[i].reduce_init = data[i].reduce_init;
25550b57cec5SDimitry Andric     arr[i].reduce_fini = data[i].reduce_fini;
25560b57cec5SDimitry Andric     __kmp_assign_orig<T>(arr[i], data[i]);
25570b57cec5SDimitry Andric     if (!arr[i].flags.lazy_priv) {
25580b57cec5SDimitry Andric       // allocate cache-line aligned block and fill it with zeros
25590b57cec5SDimitry Andric       arr[i].reduce_priv = __kmp_allocate(nth * size);
25600b57cec5SDimitry Andric       arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size;
25610b57cec5SDimitry Andric       if (arr[i].reduce_init != NULL) {
25620b57cec5SDimitry Andric         // initialize all thread-specific items
2563e8d8bef9SDimitry Andric         for (size_t j = 0; j < nth; ++j) {
25640b57cec5SDimitry Andric           __kmp_call_init<T>(arr[i], j * size);
25650b57cec5SDimitry Andric         }
25660b57cec5SDimitry Andric       }
25670b57cec5SDimitry Andric     } else {
25680b57cec5SDimitry Andric       // only allocate space for pointers now,
25690b57cec5SDimitry Andric       // objects will be lazily allocated/initialized if/when requested
25700b57cec5SDimitry Andric       // note that __kmp_allocate zeroes the allocated memory
25710b57cec5SDimitry Andric       arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *));
25720b57cec5SDimitry Andric     }
25730b57cec5SDimitry Andric   }
25740b57cec5SDimitry Andric   tg->reduce_data = (void *)arr;
25750b57cec5SDimitry Andric   tg->reduce_num_data = num;
25760b57cec5SDimitry Andric   return (void *)tg;
25770b57cec5SDimitry Andric }
25780b57cec5SDimitry Andric 
25790b57cec5SDimitry Andric /*!
25800b57cec5SDimitry Andric @ingroup TASKING
25810b57cec5SDimitry Andric @param gtid      Global thread ID
25820b57cec5SDimitry Andric @param num       Number of data items to reduce
25830b57cec5SDimitry Andric @param data      Array of data for reduction
25840b57cec5SDimitry Andric @return The taskgroup identifier
25850b57cec5SDimitry Andric 
25860b57cec5SDimitry Andric Initialize task reduction for the taskgroup.
25870b57cec5SDimitry Andric 
25880b57cec5SDimitry Andric Note: this entry supposes the optional compiler-generated initializer routine
25890b57cec5SDimitry Andric has single parameter - pointer to object to be initialized. That means
25900b57cec5SDimitry Andric the reduction either does not use omp_orig object, or the omp_orig is accessible
25910b57cec5SDimitry Andric without help of the runtime library.
25920b57cec5SDimitry Andric */
25930b57cec5SDimitry Andric void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
259406c3fb27SDimitry Andric #if OMPX_TASKGRAPH
259506c3fb27SDimitry Andric   kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
259606c3fb27SDimitry Andric   if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
259706c3fb27SDimitry Andric     kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
259806c3fb27SDimitry Andric     this_tdg->rec_taskred_data =
259906c3fb27SDimitry Andric         __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
260006c3fb27SDimitry Andric     this_tdg->rec_num_taskred = num;
260106c3fb27SDimitry Andric     KMP_MEMCPY(this_tdg->rec_taskred_data, data,
260206c3fb27SDimitry Andric                sizeof(kmp_task_red_input_t) * num);
260306c3fb27SDimitry Andric   }
260406c3fb27SDimitry Andric #endif
26050b57cec5SDimitry Andric   return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data);
26060b57cec5SDimitry Andric }
26070b57cec5SDimitry Andric 
26080b57cec5SDimitry Andric /*!
26090b57cec5SDimitry Andric @ingroup TASKING
26100b57cec5SDimitry Andric @param gtid      Global thread ID
26110b57cec5SDimitry Andric @param num       Number of data items to reduce
26120b57cec5SDimitry Andric @param data      Array of data for reduction
26130b57cec5SDimitry Andric @return The taskgroup identifier
26140b57cec5SDimitry Andric 
26150b57cec5SDimitry Andric Initialize task reduction for the taskgroup.
26160b57cec5SDimitry Andric 
26170b57cec5SDimitry Andric Note: this entry supposes the optional compiler-generated initializer routine
26180b57cec5SDimitry Andric has two parameters, pointer to object to be initialized and pointer to omp_orig
26190b57cec5SDimitry Andric */
26200b57cec5SDimitry Andric void *__kmpc_taskred_init(int gtid, int num, void *data) {
262106c3fb27SDimitry Andric #if OMPX_TASKGRAPH
262206c3fb27SDimitry Andric   kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
262306c3fb27SDimitry Andric   if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
262406c3fb27SDimitry Andric     kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
262506c3fb27SDimitry Andric     this_tdg->rec_taskred_data =
262606c3fb27SDimitry Andric         __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
262706c3fb27SDimitry Andric     this_tdg->rec_num_taskred = num;
262806c3fb27SDimitry Andric     KMP_MEMCPY(this_tdg->rec_taskred_data, data,
262906c3fb27SDimitry Andric                sizeof(kmp_task_red_input_t) * num);
263006c3fb27SDimitry Andric   }
263106c3fb27SDimitry Andric #endif
26320b57cec5SDimitry Andric   return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
26330b57cec5SDimitry Andric }
26340b57cec5SDimitry Andric 
26350b57cec5SDimitry Andric // Copy task reduction data (except for shared pointers).
26360b57cec5SDimitry Andric template <typename T>
26370b57cec5SDimitry Andric void __kmp_task_reduction_init_copy(kmp_info_t *thr, int num, T *data,
26380b57cec5SDimitry Andric                                     kmp_taskgroup_t *tg, void *reduce_data) {
26390b57cec5SDimitry Andric   kmp_taskred_data_t *arr;
26400b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
26410b57cec5SDimitry Andric                 " from data %p\n",
26420b57cec5SDimitry Andric                 thr, tg, reduce_data));
26430b57cec5SDimitry Andric   arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
26440b57cec5SDimitry Andric       thr, num * sizeof(kmp_taskred_data_t));
26450b57cec5SDimitry Andric   // threads will share private copies, thunk routines, sizes, flags, etc.:
26460b57cec5SDimitry Andric   KMP_MEMCPY(arr, reduce_data, num * sizeof(kmp_taskred_data_t));
26470b57cec5SDimitry Andric   for (int i = 0; i < num; ++i) {
26480b57cec5SDimitry Andric     arr[i].reduce_shar = data[i].reduce_shar; // init unique shared pointers
26490b57cec5SDimitry Andric   }
26500b57cec5SDimitry Andric   tg->reduce_data = (void *)arr;
26510b57cec5SDimitry Andric   tg->reduce_num_data = num;
26520b57cec5SDimitry Andric }
26530b57cec5SDimitry Andric 
26540b57cec5SDimitry Andric /*!
26550b57cec5SDimitry Andric @ingroup TASKING
26560b57cec5SDimitry Andric @param gtid    Global thread ID
26570b57cec5SDimitry Andric @param tskgrp  The taskgroup ID (optional)
26580b57cec5SDimitry Andric @param data    Shared location of the item
26590b57cec5SDimitry Andric @return The pointer to per-thread data
26600b57cec5SDimitry Andric 
26610b57cec5SDimitry Andric Get thread-specific location of data item
26620b57cec5SDimitry Andric */
26630b57cec5SDimitry Andric void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
2664e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
26650b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
26660b57cec5SDimitry Andric   kmp_int32 nth = thread->th.th_team_nproc;
26670b57cec5SDimitry Andric   if (nth == 1)
26680b57cec5SDimitry Andric     return data; // nothing to do
26690b57cec5SDimitry Andric 
26700b57cec5SDimitry Andric   kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
26710b57cec5SDimitry Andric   if (tg == NULL)
26720b57cec5SDimitry Andric     tg = thread->th.th_current_task->td_taskgroup;
26730b57cec5SDimitry Andric   KMP_ASSERT(tg != NULL);
2674*0fca6ea1SDimitry Andric   kmp_taskred_data_t *arr;
2675*0fca6ea1SDimitry Andric   kmp_int32 num;
26760b57cec5SDimitry Andric   kmp_int32 tid = thread->th.th_info.ds.ds_tid;
26770b57cec5SDimitry Andric 
267806c3fb27SDimitry Andric #if OMPX_TASKGRAPH
267906c3fb27SDimitry Andric   if ((thread->th.th_current_task->is_taskgraph) &&
268006c3fb27SDimitry Andric       (!__kmp_tdg_is_recording(
268106c3fb27SDimitry Andric           __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
268206c3fb27SDimitry Andric     tg = thread->th.th_current_task->td_taskgroup;
268306c3fb27SDimitry Andric     KMP_ASSERT(tg != NULL);
268406c3fb27SDimitry Andric     KMP_ASSERT(tg->reduce_data != NULL);
268506c3fb27SDimitry Andric     arr = (kmp_taskred_data_t *)(tg->reduce_data);
268606c3fb27SDimitry Andric     num = tg->reduce_num_data;
268706c3fb27SDimitry Andric   }
268806c3fb27SDimitry Andric #endif
268906c3fb27SDimitry Andric 
26900b57cec5SDimitry Andric   KMP_ASSERT(data != NULL);
26910b57cec5SDimitry Andric   while (tg != NULL) {
2692*0fca6ea1SDimitry Andric     arr = (kmp_taskred_data_t *)(tg->reduce_data);
2693*0fca6ea1SDimitry Andric     num = tg->reduce_num_data;
26940b57cec5SDimitry Andric     for (int i = 0; i < num; ++i) {
26950b57cec5SDimitry Andric       if (!arr[i].flags.lazy_priv) {
26960b57cec5SDimitry Andric         if (data == arr[i].reduce_shar ||
26970b57cec5SDimitry Andric             (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
26980b57cec5SDimitry Andric           return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
26990b57cec5SDimitry Andric       } else {
27000b57cec5SDimitry Andric         // check shared location first
27010b57cec5SDimitry Andric         void **p_priv = (void **)(arr[i].reduce_priv);
27020b57cec5SDimitry Andric         if (data == arr[i].reduce_shar)
27030b57cec5SDimitry Andric           goto found;
27040b57cec5SDimitry Andric         // check if we get some thread specific location as parameter
27050b57cec5SDimitry Andric         for (int j = 0; j < nth; ++j)
27060b57cec5SDimitry Andric           if (data == p_priv[j])
27070b57cec5SDimitry Andric             goto found;
27080b57cec5SDimitry Andric         continue; // not found, continue search
27090b57cec5SDimitry Andric       found:
27100b57cec5SDimitry Andric         if (p_priv[tid] == NULL) {
27110b57cec5SDimitry Andric           // allocate thread specific object lazily
27120b57cec5SDimitry Andric           p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
27130b57cec5SDimitry Andric           if (arr[i].reduce_init != NULL) {
27140b57cec5SDimitry Andric             if (arr[i].reduce_orig != NULL) { // new interface
27150b57cec5SDimitry Andric               ((void (*)(void *, void *))arr[i].reduce_init)(
27160b57cec5SDimitry Andric                   p_priv[tid], arr[i].reduce_orig);
27170b57cec5SDimitry Andric             } else { // old interface (single parameter)
27180b57cec5SDimitry Andric               ((void (*)(void *))arr[i].reduce_init)(p_priv[tid]);
27190b57cec5SDimitry Andric             }
27200b57cec5SDimitry Andric           }
27210b57cec5SDimitry Andric         }
27220b57cec5SDimitry Andric         return p_priv[tid];
27230b57cec5SDimitry Andric       }
27240b57cec5SDimitry Andric     }
27255f757f3fSDimitry Andric     KMP_ASSERT(tg->parent);
27260b57cec5SDimitry Andric     tg = tg->parent;
27270b57cec5SDimitry Andric   }
27280b57cec5SDimitry Andric   KMP_ASSERT2(0, "Unknown task reduction item");
27290b57cec5SDimitry Andric   return NULL; // ERROR, this line never executed
27300b57cec5SDimitry Andric }
27310b57cec5SDimitry Andric 
27320b57cec5SDimitry Andric // Finalize task reduction.
27330b57cec5SDimitry Andric // Called from __kmpc_end_taskgroup()
27340b57cec5SDimitry Andric static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
27350b57cec5SDimitry Andric   kmp_int32 nth = th->th.th_team_nproc;
27365f757f3fSDimitry Andric   KMP_DEBUG_ASSERT(
27375f757f3fSDimitry Andric       nth > 1 ||
27385f757f3fSDimitry Andric       __kmp_enable_hidden_helper); // should not be called if nth == 1 unless we
27395f757f3fSDimitry Andric                                    // are using hidden helper threads
27400b57cec5SDimitry Andric   kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
27410b57cec5SDimitry Andric   kmp_int32 num = tg->reduce_num_data;
27420b57cec5SDimitry Andric   for (int i = 0; i < num; ++i) {
27430b57cec5SDimitry Andric     void *sh_data = arr[i].reduce_shar;
27440b57cec5SDimitry Andric     void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini);
27450b57cec5SDimitry Andric     void (*f_comb)(void *, void *) =
27460b57cec5SDimitry Andric         (void (*)(void *, void *))(arr[i].reduce_comb);
27470b57cec5SDimitry Andric     if (!arr[i].flags.lazy_priv) {
27480b57cec5SDimitry Andric       void *pr_data = arr[i].reduce_priv;
27490b57cec5SDimitry Andric       size_t size = arr[i].reduce_size;
27500b57cec5SDimitry Andric       for (int j = 0; j < nth; ++j) {
27510b57cec5SDimitry Andric         void *priv_data = (char *)pr_data + j * size;
27520b57cec5SDimitry Andric         f_comb(sh_data, priv_data); // combine results
27530b57cec5SDimitry Andric         if (f_fini)
27540b57cec5SDimitry Andric           f_fini(priv_data); // finalize if needed
27550b57cec5SDimitry Andric       }
27560b57cec5SDimitry Andric     } else {
27570b57cec5SDimitry Andric       void **pr_data = (void **)(arr[i].reduce_priv);
27580b57cec5SDimitry Andric       for (int j = 0; j < nth; ++j) {
27590b57cec5SDimitry Andric         if (pr_data[j] != NULL) {
27600b57cec5SDimitry Andric           f_comb(sh_data, pr_data[j]); // combine results
27610b57cec5SDimitry Andric           if (f_fini)
27620b57cec5SDimitry Andric             f_fini(pr_data[j]); // finalize if needed
27630b57cec5SDimitry Andric           __kmp_free(pr_data[j]);
27640b57cec5SDimitry Andric         }
27650b57cec5SDimitry Andric       }
27660b57cec5SDimitry Andric     }
27670b57cec5SDimitry Andric     __kmp_free(arr[i].reduce_priv);
27680b57cec5SDimitry Andric   }
27690b57cec5SDimitry Andric   __kmp_thread_free(th, arr);
27700b57cec5SDimitry Andric   tg->reduce_data = NULL;
27710b57cec5SDimitry Andric   tg->reduce_num_data = 0;
27720b57cec5SDimitry Andric }
27730b57cec5SDimitry Andric 
27740b57cec5SDimitry Andric // Cleanup task reduction data for parallel or worksharing,
27750b57cec5SDimitry Andric // do not touch task private data other threads still working with.
27760b57cec5SDimitry Andric // Called from __kmpc_end_taskgroup()
27770b57cec5SDimitry Andric static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
27780b57cec5SDimitry Andric   __kmp_thread_free(th, tg->reduce_data);
27790b57cec5SDimitry Andric   tg->reduce_data = NULL;
27800b57cec5SDimitry Andric   tg->reduce_num_data = 0;
27810b57cec5SDimitry Andric }
27820b57cec5SDimitry Andric 
27830b57cec5SDimitry Andric template <typename T>
27840b57cec5SDimitry Andric void *__kmp_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
27850b57cec5SDimitry Andric                                          int num, T *data) {
2786e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
27870b57cec5SDimitry Andric   kmp_info_t *thr = __kmp_threads[gtid];
27880b57cec5SDimitry Andric   kmp_int32 nth = thr->th.th_team_nproc;
27890b57cec5SDimitry Andric   __kmpc_taskgroup(loc, gtid); // form new taskgroup first
27900b57cec5SDimitry Andric   if (nth == 1) {
27910b57cec5SDimitry Andric     KA_TRACE(10,
27920b57cec5SDimitry Andric              ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
27930b57cec5SDimitry Andric               gtid, thr->th.th_current_task->td_taskgroup));
27940b57cec5SDimitry Andric     return (void *)thr->th.th_current_task->td_taskgroup;
27950b57cec5SDimitry Andric   }
27960b57cec5SDimitry Andric   kmp_team_t *team = thr->th.th_team;
27970b57cec5SDimitry Andric   void *reduce_data;
27980b57cec5SDimitry Andric   kmp_taskgroup_t *tg;
27990b57cec5SDimitry Andric   reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
28000b57cec5SDimitry Andric   if (reduce_data == NULL &&
28010b57cec5SDimitry Andric       __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
28020b57cec5SDimitry Andric                                  (void *)1)) {
28030b57cec5SDimitry Andric     // single thread enters this block to initialize common reduction data
28040b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(reduce_data == NULL);
28050b57cec5SDimitry Andric     // first initialize own data, then make a copy other threads can use
28060b57cec5SDimitry Andric     tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
28070b57cec5SDimitry Andric     reduce_data = __kmp_thread_malloc(thr, num * sizeof(kmp_taskred_data_t));
28080b57cec5SDimitry Andric     KMP_MEMCPY(reduce_data, tg->reduce_data, num * sizeof(kmp_taskred_data_t));
28090b57cec5SDimitry Andric     // fini counters should be 0 at this point
28100b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
28110b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
28120b57cec5SDimitry Andric     KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
28130b57cec5SDimitry Andric   } else {
28140b57cec5SDimitry Andric     while (
28150b57cec5SDimitry Andric         (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
28160b57cec5SDimitry Andric         (void *)1) { // wait for task reduction initialization
28170b57cec5SDimitry Andric       KMP_CPU_PAUSE();
28180b57cec5SDimitry Andric     }
28190b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here
28200b57cec5SDimitry Andric     tg = thr->th.th_current_task->td_taskgroup;
28210b57cec5SDimitry Andric     __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
28220b57cec5SDimitry Andric   }
28230b57cec5SDimitry Andric   return tg;
28240b57cec5SDimitry Andric }
28250b57cec5SDimitry Andric 
28260b57cec5SDimitry Andric /*!
28270b57cec5SDimitry Andric @ingroup TASKING
28280b57cec5SDimitry Andric @param loc       Source location info
28290b57cec5SDimitry Andric @param gtid      Global thread ID
28300b57cec5SDimitry Andric @param is_ws     Is 1 if the reduction is for worksharing, 0 otherwise
28310b57cec5SDimitry Andric @param num       Number of data items to reduce
28320b57cec5SDimitry Andric @param data      Array of data for reduction
28330b57cec5SDimitry Andric @return The taskgroup identifier
28340b57cec5SDimitry Andric 
28350b57cec5SDimitry Andric Initialize task reduction for a parallel or worksharing.
28360b57cec5SDimitry Andric 
28370b57cec5SDimitry Andric Note: this entry supposes the optional compiler-generated initializer routine
28380b57cec5SDimitry Andric has single parameter - pointer to object to be initialized. That means
28390b57cec5SDimitry Andric the reduction either does not use omp_orig object, or the omp_orig is accessible
28400b57cec5SDimitry Andric without help of the runtime library.
28410b57cec5SDimitry Andric */
28420b57cec5SDimitry Andric void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
28430b57cec5SDimitry Andric                                           int num, void *data) {
28440b57cec5SDimitry Andric   return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
28450b57cec5SDimitry Andric                                             (kmp_task_red_input_t *)data);
28460b57cec5SDimitry Andric }
28470b57cec5SDimitry Andric 
28480b57cec5SDimitry Andric /*!
28490b57cec5SDimitry Andric @ingroup TASKING
28500b57cec5SDimitry Andric @param loc       Source location info
28510b57cec5SDimitry Andric @param gtid      Global thread ID
28520b57cec5SDimitry Andric @param is_ws     Is 1 if the reduction is for worksharing, 0 otherwise
28530b57cec5SDimitry Andric @param num       Number of data items to reduce
28540b57cec5SDimitry Andric @param data      Array of data for reduction
28550b57cec5SDimitry Andric @return The taskgroup identifier
28560b57cec5SDimitry Andric 
28570b57cec5SDimitry Andric Initialize task reduction for a parallel or worksharing.
28580b57cec5SDimitry Andric 
28590b57cec5SDimitry Andric Note: this entry supposes the optional compiler-generated initializer routine
28600b57cec5SDimitry Andric has two parameters, pointer to object to be initialized and pointer to omp_orig
28610b57cec5SDimitry Andric */
28620b57cec5SDimitry Andric void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num,
28630b57cec5SDimitry Andric                                    void *data) {
28640b57cec5SDimitry Andric   return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
28650b57cec5SDimitry Andric                                             (kmp_taskred_input_t *)data);
28660b57cec5SDimitry Andric }
28670b57cec5SDimitry Andric 
28680b57cec5SDimitry Andric /*!
28690b57cec5SDimitry Andric @ingroup TASKING
28700b57cec5SDimitry Andric @param loc       Source location info
28710b57cec5SDimitry Andric @param gtid      Global thread ID
28720b57cec5SDimitry Andric @param is_ws     Is 1 if the reduction is for worksharing, 0 otherwise
28730b57cec5SDimitry Andric 
28740b57cec5SDimitry Andric Finalize task reduction for a parallel or worksharing.
28750b57cec5SDimitry Andric */
28760b57cec5SDimitry Andric void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws) {
28770b57cec5SDimitry Andric   __kmpc_end_taskgroup(loc, gtid);
28780b57cec5SDimitry Andric }
28790b57cec5SDimitry Andric 
28800b57cec5SDimitry Andric // __kmpc_taskgroup: Start a new taskgroup
28810b57cec5SDimitry Andric void __kmpc_taskgroup(ident_t *loc, int gtid) {
2882e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
28830b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
28840b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = thread->th.th_current_task;
28850b57cec5SDimitry Andric   kmp_taskgroup_t *tg_new =
28860b57cec5SDimitry Andric       (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t));
28870b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
28880b57cec5SDimitry Andric   KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
28890b57cec5SDimitry Andric   KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
28900b57cec5SDimitry Andric   tg_new->parent = taskdata->td_taskgroup;
28910b57cec5SDimitry Andric   tg_new->reduce_data = NULL;
28920b57cec5SDimitry Andric   tg_new->reduce_num_data = 0;
2893fe6060f1SDimitry Andric   tg_new->gomp_data = NULL;
28940b57cec5SDimitry Andric   taskdata->td_taskgroup = tg_new;
28950b57cec5SDimitry Andric 
28960b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
28970b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
28980b57cec5SDimitry Andric     void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
28990b57cec5SDimitry Andric     if (!codeptr)
29000b57cec5SDimitry Andric       codeptr = OMPT_GET_RETURN_ADDRESS(0);
29010b57cec5SDimitry Andric     kmp_team_t *team = thread->th.th_team;
29020b57cec5SDimitry Andric     ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
29030b57cec5SDimitry Andric     // FIXME: I think this is wrong for lwt!
29040b57cec5SDimitry Andric     ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
29050b57cec5SDimitry Andric 
29060b57cec5SDimitry Andric     ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
29070b57cec5SDimitry Andric         ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
29080b57cec5SDimitry Andric         &(my_task_data), codeptr);
29090b57cec5SDimitry Andric   }
29100b57cec5SDimitry Andric #endif
29110b57cec5SDimitry Andric }
29120b57cec5SDimitry Andric 
29130b57cec5SDimitry Andric // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
29140b57cec5SDimitry Andric //                       and its descendants are complete
29150b57cec5SDimitry Andric void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
2916e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
29170b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
29180b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = thread->th.th_current_task;
29190b57cec5SDimitry Andric   kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
29200b57cec5SDimitry Andric   int thread_finished = FALSE;
29210b57cec5SDimitry Andric 
29220b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29230b57cec5SDimitry Andric   kmp_team_t *team;
29240b57cec5SDimitry Andric   ompt_data_t my_task_data;
29250b57cec5SDimitry Andric   ompt_data_t my_parallel_data;
2926fe6060f1SDimitry Andric   void *codeptr = nullptr;
29270b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled)) {
29280b57cec5SDimitry Andric     team = thread->th.th_team;
29290b57cec5SDimitry Andric     my_task_data = taskdata->ompt_task_info.task_data;
29300b57cec5SDimitry Andric     // FIXME: I think this is wrong for lwt!
29310b57cec5SDimitry Andric     my_parallel_data = team->t.ompt_team_info.parallel_data;
29320b57cec5SDimitry Andric     codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
29330b57cec5SDimitry Andric     if (!codeptr)
29340b57cec5SDimitry Andric       codeptr = OMPT_GET_RETURN_ADDRESS(0);
29350b57cec5SDimitry Andric   }
29360b57cec5SDimitry Andric #endif
29370b57cec5SDimitry Andric 
29380b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
29390b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskgroup != NULL);
29400b57cec5SDimitry Andric   KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
29410b57cec5SDimitry Andric 
29420b57cec5SDimitry Andric   if (__kmp_tasking_mode != tskm_immediate_exec) {
29430b57cec5SDimitry Andric     // mark task as waiting not on a barrier
29440b57cec5SDimitry Andric     taskdata->td_taskwait_counter += 1;
29450b57cec5SDimitry Andric     taskdata->td_taskwait_ident = loc;
29460b57cec5SDimitry Andric     taskdata->td_taskwait_thread = gtid + 1;
29470b57cec5SDimitry Andric #if USE_ITT_BUILD
29480b57cec5SDimitry Andric     // For ITT the taskgroup wait is similar to taskwait until we need to
29490b57cec5SDimitry Andric     // distinguish them
2950fe6060f1SDimitry Andric     void *itt_sync_obj = NULL;
2951fe6060f1SDimitry Andric #if USE_ITT_NOTIFY
2952fe6060f1SDimitry Andric     KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2953fe6060f1SDimitry Andric #endif /* USE_ITT_NOTIFY */
29540b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
29550b57cec5SDimitry Andric 
29560b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29570b57cec5SDimitry Andric     if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
29580b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
29590b57cec5SDimitry Andric           ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
29600b57cec5SDimitry Andric           &(my_task_data), codeptr);
29610b57cec5SDimitry Andric     }
29620b57cec5SDimitry Andric #endif
29630b57cec5SDimitry Andric 
29640b57cec5SDimitry Andric     if (!taskdata->td_flags.team_serial ||
29650b57cec5SDimitry Andric         (thread->th.th_task_team != NULL &&
2966fe6060f1SDimitry Andric          (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2967fe6060f1SDimitry Andric           thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2968e8d8bef9SDimitry Andric       kmp_flag_32<false, false> flag(
2969e8d8bef9SDimitry Andric           RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
29700b57cec5SDimitry Andric       while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
29710b57cec5SDimitry Andric         flag.execute_tasks(thread, gtid, FALSE,
29720b57cec5SDimitry Andric                            &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
29730b57cec5SDimitry Andric                            __kmp_task_stealing_constraint);
29740b57cec5SDimitry Andric       }
29750b57cec5SDimitry Andric     }
29760b57cec5SDimitry Andric     taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; // end waiting
29770b57cec5SDimitry Andric 
29780b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
29790b57cec5SDimitry Andric     if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
29800b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
29810b57cec5SDimitry Andric           ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
29820b57cec5SDimitry Andric           &(my_task_data), codeptr);
29830b57cec5SDimitry Andric     }
29840b57cec5SDimitry Andric #endif
29850b57cec5SDimitry Andric 
29860b57cec5SDimitry Andric #if USE_ITT_BUILD
2987fe6060f1SDimitry Andric     KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2988e8d8bef9SDimitry Andric     KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with descendants
29890b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
29900b57cec5SDimitry Andric   }
29910b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskgroup->count == 0);
29920b57cec5SDimitry Andric 
2993fe6060f1SDimitry Andric   if (taskgroup->reduce_data != NULL &&
2994fe6060f1SDimitry Andric       !taskgroup->gomp_data) { // need to reduce?
29950b57cec5SDimitry Andric     int cnt;
29960b57cec5SDimitry Andric     void *reduce_data;
29970b57cec5SDimitry Andric     kmp_team_t *t = thread->th.th_team;
29980b57cec5SDimitry Andric     kmp_taskred_data_t *arr = (kmp_taskred_data_t *)taskgroup->reduce_data;
29990b57cec5SDimitry Andric     // check if <priv> data of the first reduction variable shared for the team
30000b57cec5SDimitry Andric     void *priv0 = arr[0].reduce_priv;
30010b57cec5SDimitry Andric     if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
30020b57cec5SDimitry Andric         ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
30030b57cec5SDimitry Andric       // finishing task reduction on parallel
30040b57cec5SDimitry Andric       cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
30050b57cec5SDimitry Andric       if (cnt == thread->th.th_team_nproc - 1) {
30060b57cec5SDimitry Andric         // we are the last thread passing __kmpc_reduction_modifier_fini()
30070b57cec5SDimitry Andric         // finalize task reduction:
30080b57cec5SDimitry Andric         __kmp_task_reduction_fini(thread, taskgroup);
30090b57cec5SDimitry Andric         // cleanup fields in the team structure:
30100b57cec5SDimitry Andric         // TODO: is relaxed store enough here (whole barrier should follow)?
30110b57cec5SDimitry Andric         __kmp_thread_free(thread, reduce_data);
30120b57cec5SDimitry Andric         KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
30130b57cec5SDimitry Andric         KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
30140b57cec5SDimitry Andric       } else {
30150b57cec5SDimitry Andric         // we are not the last thread passing __kmpc_reduction_modifier_fini(),
30160b57cec5SDimitry Andric         // so do not finalize reduction, just clean own copy of the data
30170b57cec5SDimitry Andric         __kmp_task_reduction_clean(thread, taskgroup);
30180b57cec5SDimitry Andric       }
30190b57cec5SDimitry Andric     } else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
30200b57cec5SDimitry Andric                    NULL &&
30210b57cec5SDimitry Andric                ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
30220b57cec5SDimitry Andric       // finishing task reduction on worksharing
30230b57cec5SDimitry Andric       cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
30240b57cec5SDimitry Andric       if (cnt == thread->th.th_team_nproc - 1) {
30250b57cec5SDimitry Andric         // we are the last thread passing __kmpc_reduction_modifier_fini()
30260b57cec5SDimitry Andric         __kmp_task_reduction_fini(thread, taskgroup);
30270b57cec5SDimitry Andric         // cleanup fields in team structure:
30280b57cec5SDimitry Andric         // TODO: is relaxed store enough here (whole barrier should follow)?
30290b57cec5SDimitry Andric         __kmp_thread_free(thread, reduce_data);
30300b57cec5SDimitry Andric         KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
30310b57cec5SDimitry Andric         KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
30320b57cec5SDimitry Andric       } else {
30330b57cec5SDimitry Andric         // we are not the last thread passing __kmpc_reduction_modifier_fini(),
30340b57cec5SDimitry Andric         // so do not finalize reduction, just clean own copy of the data
30350b57cec5SDimitry Andric         __kmp_task_reduction_clean(thread, taskgroup);
30360b57cec5SDimitry Andric       }
30370b57cec5SDimitry Andric     } else {
30380b57cec5SDimitry Andric       // finishing task reduction on taskgroup
30390b57cec5SDimitry Andric       __kmp_task_reduction_fini(thread, taskgroup);
30400b57cec5SDimitry Andric     }
30410b57cec5SDimitry Andric   }
30420b57cec5SDimitry Andric   // Restore parent taskgroup for the current task
30430b57cec5SDimitry Andric   taskdata->td_taskgroup = taskgroup->parent;
30440b57cec5SDimitry Andric   __kmp_thread_free(thread, taskgroup);
30450b57cec5SDimitry Andric 
30460b57cec5SDimitry Andric   KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
30470b57cec5SDimitry Andric                 gtid, taskdata));
30480b57cec5SDimitry Andric 
30490b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
30500b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
30510b57cec5SDimitry Andric     ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
30520b57cec5SDimitry Andric         ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
30530b57cec5SDimitry Andric         &(my_task_data), codeptr);
30540b57cec5SDimitry Andric   }
30550b57cec5SDimitry Andric #endif
30560b57cec5SDimitry Andric }
30570b57cec5SDimitry Andric 
305881ad6265SDimitry Andric static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
305981ad6265SDimitry Andric                                            kmp_task_team_t *task_team,
306081ad6265SDimitry Andric                                            kmp_int32 is_constrained) {
306181ad6265SDimitry Andric   kmp_task_t *task = NULL;
306281ad6265SDimitry Andric   kmp_taskdata_t *taskdata;
306381ad6265SDimitry Andric   kmp_taskdata_t *current;
306481ad6265SDimitry Andric   kmp_thread_data_t *thread_data;
306581ad6265SDimitry Andric   int ntasks = task_team->tt.tt_num_task_pri;
306681ad6265SDimitry Andric   if (ntasks == 0) {
306781ad6265SDimitry Andric     KA_TRACE(
306881ad6265SDimitry Andric         20, ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid));
306981ad6265SDimitry Andric     return NULL;
307081ad6265SDimitry Andric   }
307181ad6265SDimitry Andric   do {
307281ad6265SDimitry Andric     // decrement num_tasks to "reserve" one task to get for execution
307381ad6265SDimitry Andric     if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
307481ad6265SDimitry Andric                                    ntasks - 1))
307581ad6265SDimitry Andric       break;
307606c3fb27SDimitry Andric     ntasks = task_team->tt.tt_num_task_pri;
307781ad6265SDimitry Andric   } while (ntasks > 0);
307881ad6265SDimitry Andric   if (ntasks == 0) {
307981ad6265SDimitry Andric     KA_TRACE(20, ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",
308081ad6265SDimitry Andric                   __kmp_get_gtid()));
308181ad6265SDimitry Andric     return NULL;
308281ad6265SDimitry Andric   }
308381ad6265SDimitry Andric   // We got a "ticket" to get a "reserved" priority task
308481ad6265SDimitry Andric   int deque_ntasks;
308581ad6265SDimitry Andric   kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
308681ad6265SDimitry Andric   do {
308781ad6265SDimitry Andric     KMP_ASSERT(list != NULL);
308881ad6265SDimitry Andric     thread_data = &list->td;
308981ad6265SDimitry Andric     __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
309081ad6265SDimitry Andric     deque_ntasks = thread_data->td.td_deque_ntasks;
309181ad6265SDimitry Andric     if (deque_ntasks == 0) {
309281ad6265SDimitry Andric       __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
309381ad6265SDimitry Andric       KA_TRACE(20, ("__kmp_get_priority_task: T#%d No tasks to get from %p\n",
309481ad6265SDimitry Andric                     __kmp_get_gtid(), thread_data));
309581ad6265SDimitry Andric       list = list->next;
309681ad6265SDimitry Andric     }
309781ad6265SDimitry Andric   } while (deque_ntasks == 0);
309881ad6265SDimitry Andric   KMP_DEBUG_ASSERT(deque_ntasks);
309981ad6265SDimitry Andric   int target = thread_data->td.td_deque_head;
310081ad6265SDimitry Andric   current = __kmp_threads[gtid]->th.th_current_task;
310181ad6265SDimitry Andric   taskdata = thread_data->td.td_deque[target];
310281ad6265SDimitry Andric   if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
310381ad6265SDimitry Andric     // Bump head pointer and Wrap.
310481ad6265SDimitry Andric     thread_data->td.td_deque_head =
310581ad6265SDimitry Andric         (target + 1) & TASK_DEQUE_MASK(thread_data->td);
310681ad6265SDimitry Andric   } else {
310781ad6265SDimitry Andric     if (!task_team->tt.tt_untied_task_encountered) {
310881ad6265SDimitry Andric       // The TSC does not allow to steal victim task
310981ad6265SDimitry Andric       __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
311081ad6265SDimitry Andric       KA_TRACE(20, ("__kmp_get_priority_task(exit #3): T#%d could not get task "
311181ad6265SDimitry Andric                     "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",
311281ad6265SDimitry Andric                     gtid, thread_data, task_team, deque_ntasks, target,
311381ad6265SDimitry Andric                     thread_data->td.td_deque_tail));
311481ad6265SDimitry Andric       task_team->tt.tt_num_task_pri++; // atomic inc, restore value
311581ad6265SDimitry Andric       return NULL;
311681ad6265SDimitry Andric     }
311781ad6265SDimitry Andric     int i;
311881ad6265SDimitry Andric     // walk through the deque trying to steal any task
311981ad6265SDimitry Andric     taskdata = NULL;
312081ad6265SDimitry Andric     for (i = 1; i < deque_ntasks; ++i) {
312181ad6265SDimitry Andric       target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
312281ad6265SDimitry Andric       taskdata = thread_data->td.td_deque[target];
312381ad6265SDimitry Andric       if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
312481ad6265SDimitry Andric         break; // found task to execute
312581ad6265SDimitry Andric       } else {
312681ad6265SDimitry Andric         taskdata = NULL;
312781ad6265SDimitry Andric       }
312881ad6265SDimitry Andric     }
312981ad6265SDimitry Andric     if (taskdata == NULL) {
313081ad6265SDimitry Andric       // No appropriate candidate found to execute
313181ad6265SDimitry Andric       __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
313281ad6265SDimitry Andric       KA_TRACE(
313381ad6265SDimitry Andric           10, ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
313481ad6265SDimitry Andric                "%p: task_team=%p ntasks=%d head=%u tail=%u\n",
313581ad6265SDimitry Andric                gtid, thread_data, task_team, deque_ntasks,
313681ad6265SDimitry Andric                thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
313781ad6265SDimitry Andric       task_team->tt.tt_num_task_pri++; // atomic inc, restore value
313881ad6265SDimitry Andric       return NULL;
313981ad6265SDimitry Andric     }
314081ad6265SDimitry Andric     int prev = target;
314181ad6265SDimitry Andric     for (i = i + 1; i < deque_ntasks; ++i) {
314281ad6265SDimitry Andric       // shift remaining tasks in the deque left by 1
314381ad6265SDimitry Andric       target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
314481ad6265SDimitry Andric       thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
314581ad6265SDimitry Andric       prev = target;
314681ad6265SDimitry Andric     }
314781ad6265SDimitry Andric     KMP_DEBUG_ASSERT(
314881ad6265SDimitry Andric         thread_data->td.td_deque_tail ==
314981ad6265SDimitry Andric         (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)));
315081ad6265SDimitry Andric     thread_data->td.td_deque_tail = target; // tail -= 1 (wrapped))
315181ad6265SDimitry Andric   }
315281ad6265SDimitry Andric   thread_data->td.td_deque_ntasks = deque_ntasks - 1;
315381ad6265SDimitry Andric   __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
315481ad6265SDimitry Andric   task = KMP_TASKDATA_TO_TASK(taskdata);
315581ad6265SDimitry Andric   return task;
315681ad6265SDimitry Andric }
315781ad6265SDimitry Andric 
31580b57cec5SDimitry Andric // __kmp_remove_my_task: remove a task from my own deque
31590b57cec5SDimitry Andric static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
31600b57cec5SDimitry Andric                                         kmp_task_team_t *task_team,
31610b57cec5SDimitry Andric                                         kmp_int32 is_constrained) {
31620b57cec5SDimitry Andric   kmp_task_t *task;
31630b57cec5SDimitry Andric   kmp_taskdata_t *taskdata;
31640b57cec5SDimitry Andric   kmp_thread_data_t *thread_data;
31650b57cec5SDimitry Andric   kmp_uint32 tail;
31660b57cec5SDimitry Andric 
31670b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
31680b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
31690b57cec5SDimitry Andric                    NULL); // Caller should check this condition
31700b57cec5SDimitry Andric 
31710b57cec5SDimitry Andric   thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
31720b57cec5SDimitry Andric 
31730b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
31740b57cec5SDimitry Andric                 gtid, thread_data->td.td_deque_ntasks,
31750b57cec5SDimitry Andric                 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
31760b57cec5SDimitry Andric 
31770b57cec5SDimitry Andric   if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
31780b57cec5SDimitry Andric     KA_TRACE(10,
31790b57cec5SDimitry Andric              ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
31800b57cec5SDimitry Andric               "ntasks=%d head=%u tail=%u\n",
31810b57cec5SDimitry Andric               gtid, thread_data->td.td_deque_ntasks,
31820b57cec5SDimitry Andric               thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
31830b57cec5SDimitry Andric     return NULL;
31840b57cec5SDimitry Andric   }
31850b57cec5SDimitry Andric 
31860b57cec5SDimitry Andric   __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
31870b57cec5SDimitry Andric 
31880b57cec5SDimitry Andric   if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
31890b57cec5SDimitry Andric     __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
31900b57cec5SDimitry Andric     KA_TRACE(10,
31910b57cec5SDimitry Andric              ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
31920b57cec5SDimitry Andric               "ntasks=%d head=%u tail=%u\n",
31930b57cec5SDimitry Andric               gtid, thread_data->td.td_deque_ntasks,
31940b57cec5SDimitry Andric               thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
31950b57cec5SDimitry Andric     return NULL;
31960b57cec5SDimitry Andric   }
31970b57cec5SDimitry Andric 
31980b57cec5SDimitry Andric   tail = (thread_data->td.td_deque_tail - 1) &
31990b57cec5SDimitry Andric          TASK_DEQUE_MASK(thread_data->td); // Wrap index.
32000b57cec5SDimitry Andric   taskdata = thread_data->td.td_deque[tail];
32010b57cec5SDimitry Andric 
32020b57cec5SDimitry Andric   if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
32030b57cec5SDimitry Andric                              thread->th.th_current_task)) {
32040b57cec5SDimitry Andric     // The TSC does not allow to steal victim task
32050b57cec5SDimitry Andric     __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
32060b57cec5SDimitry Andric     KA_TRACE(10,
32070b57cec5SDimitry Andric              ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
32080b57cec5SDimitry Andric               "ntasks=%d head=%u tail=%u\n",
32090b57cec5SDimitry Andric               gtid, thread_data->td.td_deque_ntasks,
32100b57cec5SDimitry Andric               thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
32110b57cec5SDimitry Andric     return NULL;
32120b57cec5SDimitry Andric   }
32130b57cec5SDimitry Andric 
32140b57cec5SDimitry Andric   thread_data->td.td_deque_tail = tail;
32150b57cec5SDimitry Andric   TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
32160b57cec5SDimitry Andric 
32170b57cec5SDimitry Andric   __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
32180b57cec5SDimitry Andric 
32190b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
32200b57cec5SDimitry Andric                 "ntasks=%d head=%u tail=%u\n",
32210b57cec5SDimitry Andric                 gtid, taskdata, thread_data->td.td_deque_ntasks,
32220b57cec5SDimitry Andric                 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
32230b57cec5SDimitry Andric 
32240b57cec5SDimitry Andric   task = KMP_TASKDATA_TO_TASK(taskdata);
32250b57cec5SDimitry Andric   return task;
32260b57cec5SDimitry Andric }
32270b57cec5SDimitry Andric 
32280b57cec5SDimitry Andric // __kmp_steal_task: remove a task from another thread's deque
32290b57cec5SDimitry Andric // Assume that calling thread has already checked existence of
32300b57cec5SDimitry Andric // task_team thread_data before calling this routine.
3231*0fca6ea1SDimitry Andric static kmp_task_t *__kmp_steal_task(kmp_int32 victim_tid, kmp_int32 gtid,
32320b57cec5SDimitry Andric                                     kmp_task_team_t *task_team,
32330b57cec5SDimitry Andric                                     std::atomic<kmp_int32> *unfinished_threads,
32340b57cec5SDimitry Andric                                     int *thread_finished,
32350b57cec5SDimitry Andric                                     kmp_int32 is_constrained) {
32360b57cec5SDimitry Andric   kmp_task_t *task;
32370b57cec5SDimitry Andric   kmp_taskdata_t *taskdata;
32380b57cec5SDimitry Andric   kmp_taskdata_t *current;
32390b57cec5SDimitry Andric   kmp_thread_data_t *victim_td, *threads_data;
32400b57cec5SDimitry Andric   kmp_int32 target;
3241*0fca6ea1SDimitry Andric   kmp_info_t *victim_thr;
32420b57cec5SDimitry Andric 
32430b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
32440b57cec5SDimitry Andric 
32450b57cec5SDimitry Andric   threads_data = task_team->tt.tt_threads_data;
32460b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition
3247*0fca6ea1SDimitry Andric   KMP_DEBUG_ASSERT(victim_tid >= 0);
3248*0fca6ea1SDimitry Andric   KMP_DEBUG_ASSERT(victim_tid < task_team->tt.tt_nproc);
32490b57cec5SDimitry Andric 
32500b57cec5SDimitry Andric   victim_td = &threads_data[victim_tid];
3251*0fca6ea1SDimitry Andric   victim_thr = victim_td->td.td_thr;
3252*0fca6ea1SDimitry Andric   (void)victim_thr; // Use in TRACE messages which aren't always enabled.
32530b57cec5SDimitry Andric 
32540b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
32550b57cec5SDimitry Andric                 "task_team=%p ntasks=%d head=%u tail=%u\n",
32560b57cec5SDimitry Andric                 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
32570b57cec5SDimitry Andric                 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
32580b57cec5SDimitry Andric                 victim_td->td.td_deque_tail));
32590b57cec5SDimitry Andric 
32600b57cec5SDimitry Andric   if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
32610b57cec5SDimitry Andric     KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
32620b57cec5SDimitry Andric                   "task_team=%p ntasks=%d head=%u tail=%u\n",
32630b57cec5SDimitry Andric                   gtid, __kmp_gtid_from_thread(victim_thr), task_team,
32640b57cec5SDimitry Andric                   victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
32650b57cec5SDimitry Andric                   victim_td->td.td_deque_tail));
32660b57cec5SDimitry Andric     return NULL;
32670b57cec5SDimitry Andric   }
32680b57cec5SDimitry Andric 
32690b57cec5SDimitry Andric   __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
32700b57cec5SDimitry Andric 
32710b57cec5SDimitry Andric   int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
32720b57cec5SDimitry Andric   // Check again after we acquire the lock
32730b57cec5SDimitry Andric   if (ntasks == 0) {
32740b57cec5SDimitry Andric     __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
32750b57cec5SDimitry Andric     KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
32760b57cec5SDimitry Andric                   "task_team=%p ntasks=%d head=%u tail=%u\n",
32770b57cec5SDimitry Andric                   gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
32780b57cec5SDimitry Andric                   victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
32790b57cec5SDimitry Andric     return NULL;
32800b57cec5SDimitry Andric   }
32810b57cec5SDimitry Andric 
32820b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
32830b57cec5SDimitry Andric   current = __kmp_threads[gtid]->th.th_current_task;
32840b57cec5SDimitry Andric   taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
32850b57cec5SDimitry Andric   if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
32860b57cec5SDimitry Andric     // Bump head pointer and Wrap.
32870b57cec5SDimitry Andric     victim_td->td.td_deque_head =
32880b57cec5SDimitry Andric         (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
32890b57cec5SDimitry Andric   } else {
32900b57cec5SDimitry Andric     if (!task_team->tt.tt_untied_task_encountered) {
32910b57cec5SDimitry Andric       // The TSC does not allow to steal victim task
32920b57cec5SDimitry Andric       __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
32930b57cec5SDimitry Andric       KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d could not steal from "
32940b57cec5SDimitry Andric                     "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
32950b57cec5SDimitry Andric                     gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
32960b57cec5SDimitry Andric                     victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
32970b57cec5SDimitry Andric       return NULL;
32980b57cec5SDimitry Andric     }
32990b57cec5SDimitry Andric     int i;
33000b57cec5SDimitry Andric     // walk through victim's deque trying to steal any task
33010b57cec5SDimitry Andric     target = victim_td->td.td_deque_head;
33020b57cec5SDimitry Andric     taskdata = NULL;
33030b57cec5SDimitry Andric     for (i = 1; i < ntasks; ++i) {
33040b57cec5SDimitry Andric       target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
33050b57cec5SDimitry Andric       taskdata = victim_td->td.td_deque[target];
33060b57cec5SDimitry Andric       if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
33070b57cec5SDimitry Andric         break; // found victim task
33080b57cec5SDimitry Andric       } else {
33090b57cec5SDimitry Andric         taskdata = NULL;
33100b57cec5SDimitry Andric       }
33110b57cec5SDimitry Andric     }
33120b57cec5SDimitry Andric     if (taskdata == NULL) {
33130b57cec5SDimitry Andric       // No appropriate candidate to steal found
33140b57cec5SDimitry Andric       __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
33150b57cec5SDimitry Andric       KA_TRACE(10, ("__kmp_steal_task(exit #4): T#%d could not steal from "
33160b57cec5SDimitry Andric                     "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
33170b57cec5SDimitry Andric                     gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
33180b57cec5SDimitry Andric                     victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
33190b57cec5SDimitry Andric       return NULL;
33200b57cec5SDimitry Andric     }
33210b57cec5SDimitry Andric     int prev = target;
33220b57cec5SDimitry Andric     for (i = i + 1; i < ntasks; ++i) {
33230b57cec5SDimitry Andric       // shift remaining tasks in the deque left by 1
33240b57cec5SDimitry Andric       target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
33250b57cec5SDimitry Andric       victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
33260b57cec5SDimitry Andric       prev = target;
33270b57cec5SDimitry Andric     }
33280b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(
33290b57cec5SDimitry Andric         victim_td->td.td_deque_tail ==
33300b57cec5SDimitry Andric         (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
33310b57cec5SDimitry Andric     victim_td->td.td_deque_tail = target; // tail -= 1 (wrapped))
33320b57cec5SDimitry Andric   }
33330b57cec5SDimitry Andric   if (*thread_finished) {
33340b57cec5SDimitry Andric     // We need to un-mark this victim as a finished victim.  This must be done
33350b57cec5SDimitry Andric     // before releasing the lock, or else other threads (starting with the
3336fe6060f1SDimitry Andric     // primary thread victim) might be prematurely released from the barrier!!!
3337349cc55cSDimitry Andric #if KMP_DEBUG
3338349cc55cSDimitry Andric     kmp_int32 count =
3339349cc55cSDimitry Andric #endif
3340349cc55cSDimitry Andric         KMP_ATOMIC_INC(unfinished_threads);
33410b57cec5SDimitry Andric     KA_TRACE(
33420b57cec5SDimitry Andric         20,
33430b57cec5SDimitry Andric         ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
33440b57cec5SDimitry Andric          gtid, count + 1, task_team));
33450b57cec5SDimitry Andric     *thread_finished = FALSE;
33460b57cec5SDimitry Andric   }
33470b57cec5SDimitry Andric   TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
33480b57cec5SDimitry Andric 
33490b57cec5SDimitry Andric   __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
33500b57cec5SDimitry Andric 
33510b57cec5SDimitry Andric   KMP_COUNT_BLOCK(TASK_stolen);
33520b57cec5SDimitry Andric   KA_TRACE(10,
33530b57cec5SDimitry Andric            ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
33540b57cec5SDimitry Andric             "task_team=%p ntasks=%d head=%u tail=%u\n",
33550b57cec5SDimitry Andric             gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
33560b57cec5SDimitry Andric             ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
33570b57cec5SDimitry Andric 
33580b57cec5SDimitry Andric   task = KMP_TASKDATA_TO_TASK(taskdata);
33590b57cec5SDimitry Andric   return task;
33600b57cec5SDimitry Andric }
33610b57cec5SDimitry Andric 
33620b57cec5SDimitry Andric // __kmp_execute_tasks_template: Choose and execute tasks until either the
33630b57cec5SDimitry Andric // condition is statisfied (return true) or there are none left (return false).
33640b57cec5SDimitry Andric //
33650b57cec5SDimitry Andric // final_spin is TRUE if this is the spin at the release barrier.
33660b57cec5SDimitry Andric // thread_finished indicates whether the thread is finished executing all
33670b57cec5SDimitry Andric // the tasks it has on its deque, and is at the release barrier.
33680b57cec5SDimitry Andric // spinner is the location on which to spin.
33690b57cec5SDimitry Andric // spinner == NULL means only execute a single task and return.
33700b57cec5SDimitry Andric // checker is the value to check to terminate the spin.
33710b57cec5SDimitry Andric template <class C>
33720b57cec5SDimitry Andric static inline int __kmp_execute_tasks_template(
33730b57cec5SDimitry Andric     kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
33740b57cec5SDimitry Andric     int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
33750b57cec5SDimitry Andric     kmp_int32 is_constrained) {
33760b57cec5SDimitry Andric   kmp_task_team_t *task_team = thread->th.th_task_team;
33770b57cec5SDimitry Andric   kmp_thread_data_t *threads_data;
33780b57cec5SDimitry Andric   kmp_task_t *task;
33790b57cec5SDimitry Andric   kmp_info_t *other_thread;
33800b57cec5SDimitry Andric   kmp_taskdata_t *current_task = thread->th.th_current_task;
33810b57cec5SDimitry Andric   std::atomic<kmp_int32> *unfinished_threads;
33820b57cec5SDimitry Andric   kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
33830b57cec5SDimitry Andric                       tid = thread->th.th_info.ds.ds_tid;
33840b57cec5SDimitry Andric 
33850b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
33860b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
33870b57cec5SDimitry Andric 
33880b57cec5SDimitry Andric   if (task_team == NULL || current_task == NULL)
33890b57cec5SDimitry Andric     return FALSE;
33900b57cec5SDimitry Andric 
33910b57cec5SDimitry Andric   KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
33920b57cec5SDimitry Andric                 "*thread_finished=%d\n",
33930b57cec5SDimitry Andric                 gtid, final_spin, *thread_finished));
33940b57cec5SDimitry Andric 
33950b57cec5SDimitry Andric   thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
33960b57cec5SDimitry Andric   threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3397e8d8bef9SDimitry Andric 
33980b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(threads_data != NULL);
33990b57cec5SDimitry Andric 
34000b57cec5SDimitry Andric   nthreads = task_team->tt.tt_nproc;
34010b57cec5SDimitry Andric   unfinished_threads = &(task_team->tt.tt_unfinished_threads);
34020b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
34030b57cec5SDimitry Andric 
34040b57cec5SDimitry Andric   while (1) { // Outer loop keeps trying to find tasks in case of single thread
34050b57cec5SDimitry Andric     // getting tasks from target constructs
34060b57cec5SDimitry Andric     while (1) { // Inner loop to find a task and execute it
34070b57cec5SDimitry Andric       task = NULL;
340881ad6265SDimitry Andric       if (task_team->tt.tt_num_task_pri) { // get priority task first
340981ad6265SDimitry Andric         task = __kmp_get_priority_task(gtid, task_team, is_constrained);
341081ad6265SDimitry Andric       }
341181ad6265SDimitry Andric       if (task == NULL && use_own_tasks) { // check own queue next
34120b57cec5SDimitry Andric         task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
34130b57cec5SDimitry Andric       }
341481ad6265SDimitry Andric       if ((task == NULL) && (nthreads > 1)) { // Steal a task finally
34150b57cec5SDimitry Andric         int asleep = 1;
34160b57cec5SDimitry Andric         use_own_tasks = 0;
34170b57cec5SDimitry Andric         // Try to steal from the last place I stole from successfully.
34180b57cec5SDimitry Andric         if (victim_tid == -2) { // haven't stolen anything yet
34190b57cec5SDimitry Andric           victim_tid = threads_data[tid].td.td_deque_last_stolen;
34200b57cec5SDimitry Andric           if (victim_tid !=
34210b57cec5SDimitry Andric               -1) // if we have a last stolen from victim, get the thread
34220b57cec5SDimitry Andric             other_thread = threads_data[victim_tid].td.td_thr;
34230b57cec5SDimitry Andric         }
34240b57cec5SDimitry Andric         if (victim_tid != -1) { // found last victim
34250b57cec5SDimitry Andric           asleep = 0;
34260b57cec5SDimitry Andric         } else if (!new_victim) { // no recent steals and we haven't already
34270b57cec5SDimitry Andric           // used a new victim; select a random thread
34280b57cec5SDimitry Andric           do { // Find a different thread to steal work from.
34290b57cec5SDimitry Andric             // Pick a random thread. Initial plan was to cycle through all the
34300b57cec5SDimitry Andric             // threads, and only return if we tried to steal from every thread,
34310b57cec5SDimitry Andric             // and failed.  Arch says that's not such a great idea.
34320b57cec5SDimitry Andric             victim_tid = __kmp_get_random(thread) % (nthreads - 1);
34330b57cec5SDimitry Andric             if (victim_tid >= tid) {
34340b57cec5SDimitry Andric               ++victim_tid; // Adjusts random distribution to exclude self
34350b57cec5SDimitry Andric             }
34360b57cec5SDimitry Andric             // Found a potential victim
34370b57cec5SDimitry Andric             other_thread = threads_data[victim_tid].td.td_thr;
34380b57cec5SDimitry Andric             // There is a slight chance that __kmp_enable_tasking() did not wake
34390b57cec5SDimitry Andric             // up all threads waiting at the barrier.  If victim is sleeping,
34400b57cec5SDimitry Andric             // then wake it up. Since we were going to pay the cache miss
34410b57cec5SDimitry Andric             // penalty for referencing another thread's kmp_info_t struct
34420b57cec5SDimitry Andric             // anyway,
34430b57cec5SDimitry Andric             // the check shouldn't cost too much performance at this point. In
34440b57cec5SDimitry Andric             // extra barrier mode, tasks do not sleep at the separate tasking
34450b57cec5SDimitry Andric             // barrier, so this isn't a problem.
34460b57cec5SDimitry Andric             asleep = 0;
34470b57cec5SDimitry Andric             if ((__kmp_tasking_mode == tskm_task_teams) &&
34480b57cec5SDimitry Andric                 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
34490b57cec5SDimitry Andric                 (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
34500b57cec5SDimitry Andric                  NULL)) {
34510b57cec5SDimitry Andric               asleep = 1;
3452349cc55cSDimitry Andric               __kmp_null_resume_wrapper(other_thread);
34530b57cec5SDimitry Andric               // A sleeping thread should not have any tasks on it's queue.
34540b57cec5SDimitry Andric               // There is a slight possibility that it resumes, steals a task
34550b57cec5SDimitry Andric               // from another thread, which spawns more tasks, all in the time
34560b57cec5SDimitry Andric               // that it takes this thread to check => don't write an assertion
34570b57cec5SDimitry Andric               // that the victim's queue is empty.  Try stealing from a
34580b57cec5SDimitry Andric               // different thread.
34590b57cec5SDimitry Andric             }
34600b57cec5SDimitry Andric           } while (asleep);
34610b57cec5SDimitry Andric         }
34620b57cec5SDimitry Andric 
34630b57cec5SDimitry Andric         if (!asleep) {
34640b57cec5SDimitry Andric           // We have a victim to try to steal from
3465*0fca6ea1SDimitry Andric           task =
3466*0fca6ea1SDimitry Andric               __kmp_steal_task(victim_tid, gtid, task_team, unfinished_threads,
3467*0fca6ea1SDimitry Andric                                thread_finished, is_constrained);
34680b57cec5SDimitry Andric         }
34690b57cec5SDimitry Andric         if (task != NULL) { // set last stolen to victim
34700b57cec5SDimitry Andric           if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
34710b57cec5SDimitry Andric             threads_data[tid].td.td_deque_last_stolen = victim_tid;
34720b57cec5SDimitry Andric             // The pre-refactored code did not try more than 1 successful new
34730b57cec5SDimitry Andric             // vicitm, unless the last one generated more local tasks;
34740b57cec5SDimitry Andric             // new_victim keeps track of this
34750b57cec5SDimitry Andric             new_victim = 1;
34760b57cec5SDimitry Andric           }
34770b57cec5SDimitry Andric         } else { // No tasks found; unset last_stolen
34780b57cec5SDimitry Andric           KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
34790b57cec5SDimitry Andric           victim_tid = -2; // no successful victim found
34800b57cec5SDimitry Andric         }
34810b57cec5SDimitry Andric       }
34820b57cec5SDimitry Andric 
3483e8d8bef9SDimitry Andric       if (task == NULL)
3484e8d8bef9SDimitry Andric         break; // break out of tasking loop
34850b57cec5SDimitry Andric 
34860b57cec5SDimitry Andric // Found a task; execute it
34870b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
34880b57cec5SDimitry Andric       if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
34890b57cec5SDimitry Andric         if (itt_sync_obj == NULL) { // we are at fork barrier where we could not
34900b57cec5SDimitry Andric           // get the object reliably
34910b57cec5SDimitry Andric           itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
34920b57cec5SDimitry Andric         }
34930b57cec5SDimitry Andric         __kmp_itt_task_starting(itt_sync_obj);
34940b57cec5SDimitry Andric       }
34950b57cec5SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
34960b57cec5SDimitry Andric       __kmp_invoke_task(gtid, task, current_task);
34970b57cec5SDimitry Andric #if USE_ITT_BUILD
34980b57cec5SDimitry Andric       if (itt_sync_obj != NULL)
34990b57cec5SDimitry Andric         __kmp_itt_task_finished(itt_sync_obj);
35000b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
35010b57cec5SDimitry Andric       // If this thread is only partway through the barrier and the condition is
35020b57cec5SDimitry Andric       // met, then return now, so that the barrier gather/release pattern can
35030b57cec5SDimitry Andric       // proceed. If this thread is in the last spin loop in the barrier,
35040b57cec5SDimitry Andric       // waiting to be released, we know that the termination condition will not
35055ffd83dbSDimitry Andric       // be satisfied, so don't waste any cycles checking it.
35060b57cec5SDimitry Andric       if (flag == NULL || (!final_spin && flag->done_check())) {
35070b57cec5SDimitry Andric         KA_TRACE(
35080b57cec5SDimitry Andric             15,
35090b57cec5SDimitry Andric             ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
35100b57cec5SDimitry Andric              gtid));
35110b57cec5SDimitry Andric         return TRUE;
35120b57cec5SDimitry Andric       }
35130b57cec5SDimitry Andric       if (thread->th.th_task_team == NULL) {
35140b57cec5SDimitry Andric         break;
35150b57cec5SDimitry Andric       }
35160b57cec5SDimitry Andric       KMP_YIELD(__kmp_library == library_throughput); // Yield before next task
35170b57cec5SDimitry Andric       // If execution of a stolen task results in more tasks being placed on our
35180b57cec5SDimitry Andric       // run queue, reset use_own_tasks
35190b57cec5SDimitry Andric       if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
35200b57cec5SDimitry Andric         KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned "
35210b57cec5SDimitry Andric                       "other tasks, restart\n",
35220b57cec5SDimitry Andric                       gtid));
35230b57cec5SDimitry Andric         use_own_tasks = 1;
35240b57cec5SDimitry Andric         new_victim = 0;
35250b57cec5SDimitry Andric       }
35260b57cec5SDimitry Andric     }
35270b57cec5SDimitry Andric 
35280b57cec5SDimitry Andric     // The task source has been exhausted. If in final spin loop of barrier,
35290b57cec5SDimitry Andric     // check if termination condition is satisfied. The work queue may be empty
35300b57cec5SDimitry Andric     // but there might be proxy tasks still executing.
35310b57cec5SDimitry Andric     if (final_spin &&
35320b57cec5SDimitry Andric         KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks) == 0) {
35330b57cec5SDimitry Andric       // First, decrement the #unfinished threads, if that has not already been
35340b57cec5SDimitry Andric       // done.  This decrement might be to the spin location, and result in the
35350b57cec5SDimitry Andric       // termination condition being satisfied.
35360b57cec5SDimitry Andric       if (!*thread_finished) {
3537349cc55cSDimitry Andric #if KMP_DEBUG
3538349cc55cSDimitry Andric         kmp_int32 count = -1 +
3539349cc55cSDimitry Andric #endif
3540349cc55cSDimitry Andric             KMP_ATOMIC_DEC(unfinished_threads);
35410b57cec5SDimitry Andric         KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
35420b57cec5SDimitry Andric                       "unfinished_threads to %d task_team=%p\n",
35430b57cec5SDimitry Andric                       gtid, count, task_team));
35440b57cec5SDimitry Andric         *thread_finished = TRUE;
35450b57cec5SDimitry Andric       }
35460b57cec5SDimitry Andric 
35470b57cec5SDimitry Andric       // It is now unsafe to reference thread->th.th_team !!!
3548fe6060f1SDimitry Andric       // Decrementing task_team->tt.tt_unfinished_threads can allow the primary
35490b57cec5SDimitry Andric       // thread to pass through the barrier, where it might reset each thread's
35500b57cec5SDimitry Andric       // th.th_team field for the next parallel region. If we can steal more
35510b57cec5SDimitry Andric       // work, we know that this has not happened yet.
35520b57cec5SDimitry Andric       if (flag != NULL && flag->done_check()) {
35530b57cec5SDimitry Andric         KA_TRACE(
35540b57cec5SDimitry Andric             15,
35550b57cec5SDimitry Andric             ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
35560b57cec5SDimitry Andric              gtid));
35570b57cec5SDimitry Andric         return TRUE;
35580b57cec5SDimitry Andric       }
35590b57cec5SDimitry Andric     }
35600b57cec5SDimitry Andric 
3561fe6060f1SDimitry Andric     // If this thread's task team is NULL, primary thread has recognized that
3562fe6060f1SDimitry Andric     // there are no more tasks; bail out
35630b57cec5SDimitry Andric     if (thread->th.th_task_team == NULL) {
35640b57cec5SDimitry Andric       KA_TRACE(15,
35650b57cec5SDimitry Andric                ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
35660b57cec5SDimitry Andric       return FALSE;
35670b57cec5SDimitry Andric     }
35680b57cec5SDimitry Andric 
356904eeddc0SDimitry Andric     // Check the flag again to see if it has already done in case to be trapped
357004eeddc0SDimitry Andric     // into infinite loop when a if0 task depends on a hidden helper task
357104eeddc0SDimitry Andric     // outside any parallel region. Detached tasks are not impacted in this case
357204eeddc0SDimitry Andric     // because the only thread executing this function has to execute the proxy
357304eeddc0SDimitry Andric     // task so it is in another code path that has the same check.
357404eeddc0SDimitry Andric     if (flag == NULL || (!final_spin && flag->done_check())) {
357504eeddc0SDimitry Andric       KA_TRACE(15,
357604eeddc0SDimitry Andric                ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
357704eeddc0SDimitry Andric                 gtid));
357804eeddc0SDimitry Andric       return TRUE;
357904eeddc0SDimitry Andric     }
358004eeddc0SDimitry Andric 
35810b57cec5SDimitry Andric     // We could be getting tasks from target constructs; if this is the only
35820b57cec5SDimitry Andric     // thread, keep trying to execute tasks from own queue
3583e8d8bef9SDimitry Andric     if (nthreads == 1 &&
3584e8d8bef9SDimitry Andric         KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks))
35850b57cec5SDimitry Andric       use_own_tasks = 1;
35860b57cec5SDimitry Andric     else {
35870b57cec5SDimitry Andric       KA_TRACE(15,
35880b57cec5SDimitry Andric                ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
35890b57cec5SDimitry Andric       return FALSE;
35900b57cec5SDimitry Andric     }
35910b57cec5SDimitry Andric   }
35920b57cec5SDimitry Andric }
35930b57cec5SDimitry Andric 
3594e8d8bef9SDimitry Andric template <bool C, bool S>
35950b57cec5SDimitry Andric int __kmp_execute_tasks_32(
3596e8d8bef9SDimitry Andric     kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag, int final_spin,
35970b57cec5SDimitry Andric     int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
35980b57cec5SDimitry Andric     kmp_int32 is_constrained) {
35990b57cec5SDimitry Andric   return __kmp_execute_tasks_template(
36000b57cec5SDimitry Andric       thread, gtid, flag, final_spin,
36010b57cec5SDimitry Andric       thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
36020b57cec5SDimitry Andric }
36030b57cec5SDimitry Andric 
3604e8d8bef9SDimitry Andric template <bool C, bool S>
36050b57cec5SDimitry Andric int __kmp_execute_tasks_64(
3606e8d8bef9SDimitry Andric     kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag, int final_spin,
36070b57cec5SDimitry Andric     int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
36080b57cec5SDimitry Andric     kmp_int32 is_constrained) {
36090b57cec5SDimitry Andric   return __kmp_execute_tasks_template(
36100b57cec5SDimitry Andric       thread, gtid, flag, final_spin,
36110b57cec5SDimitry Andric       thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
36120b57cec5SDimitry Andric }
36130b57cec5SDimitry Andric 
3614349cc55cSDimitry Andric template <bool C, bool S>
3615349cc55cSDimitry Andric int __kmp_atomic_execute_tasks_64(
3616349cc55cSDimitry Andric     kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
3617349cc55cSDimitry Andric     int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
3618349cc55cSDimitry Andric     kmp_int32 is_constrained) {
3619349cc55cSDimitry Andric   return __kmp_execute_tasks_template(
3620349cc55cSDimitry Andric       thread, gtid, flag, final_spin,
3621349cc55cSDimitry Andric       thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3622349cc55cSDimitry Andric }
3623349cc55cSDimitry Andric 
36240b57cec5SDimitry Andric int __kmp_execute_tasks_oncore(
36250b57cec5SDimitry Andric     kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
36260b57cec5SDimitry Andric     int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
36270b57cec5SDimitry Andric     kmp_int32 is_constrained) {
36280b57cec5SDimitry Andric   return __kmp_execute_tasks_template(
36290b57cec5SDimitry Andric       thread, gtid, flag, final_spin,
36300b57cec5SDimitry Andric       thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
36310b57cec5SDimitry Andric }
36320b57cec5SDimitry Andric 
3633e8d8bef9SDimitry Andric template int
3634e8d8bef9SDimitry Andric __kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3635e8d8bef9SDimitry Andric                                      kmp_flag_32<false, false> *, int,
3636e8d8bef9SDimitry Andric                                      int *USE_ITT_BUILD_ARG(void *), kmp_int32);
3637e8d8bef9SDimitry Andric 
3638e8d8bef9SDimitry Andric template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3639e8d8bef9SDimitry Andric                                                  kmp_flag_64<false, true> *,
3640e8d8bef9SDimitry Andric                                                  int,
3641e8d8bef9SDimitry Andric                                                  int *USE_ITT_BUILD_ARG(void *),
3642e8d8bef9SDimitry Andric                                                  kmp_int32);
3643e8d8bef9SDimitry Andric 
3644e8d8bef9SDimitry Andric template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3645e8d8bef9SDimitry Andric                                                  kmp_flag_64<true, false> *,
3646e8d8bef9SDimitry Andric                                                  int,
3647e8d8bef9SDimitry Andric                                                  int *USE_ITT_BUILD_ARG(void *),
3648e8d8bef9SDimitry Andric                                                  kmp_int32);
3649e8d8bef9SDimitry Andric 
3650349cc55cSDimitry Andric template int __kmp_atomic_execute_tasks_64<false, true>(
3651349cc55cSDimitry Andric     kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
3652349cc55cSDimitry Andric     int *USE_ITT_BUILD_ARG(void *), kmp_int32);
3653349cc55cSDimitry Andric 
3654349cc55cSDimitry Andric template int __kmp_atomic_execute_tasks_64<true, false>(
3655349cc55cSDimitry Andric     kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
3656349cc55cSDimitry Andric     int *USE_ITT_BUILD_ARG(void *), kmp_int32);
3657349cc55cSDimitry Andric 
36580b57cec5SDimitry Andric // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
36590b57cec5SDimitry Andric // next barrier so they can assist in executing enqueued tasks.
36600b57cec5SDimitry Andric // First thread in allocates the task team atomically.
36610b57cec5SDimitry Andric static void __kmp_enable_tasking(kmp_task_team_t *task_team,
36620b57cec5SDimitry Andric                                  kmp_info_t *this_thr) {
36630b57cec5SDimitry Andric   kmp_thread_data_t *threads_data;
36640b57cec5SDimitry Andric   int nthreads, i, is_init_thread;
36650b57cec5SDimitry Andric 
36660b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n",
36670b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr)));
36680b57cec5SDimitry Andric 
36690b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task_team != NULL);
36700b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
36710b57cec5SDimitry Andric 
36720b57cec5SDimitry Andric   nthreads = task_team->tt.tt_nproc;
36730b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(nthreads > 0);
36740b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
36750b57cec5SDimitry Andric 
36760b57cec5SDimitry Andric   // Allocate or increase the size of threads_data if necessary
36770b57cec5SDimitry Andric   is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
36780b57cec5SDimitry Andric 
36790b57cec5SDimitry Andric   if (!is_init_thread) {
36800b57cec5SDimitry Andric     // Some other thread already set up the array.
36810b57cec5SDimitry Andric     KA_TRACE(
36820b57cec5SDimitry Andric         20,
36830b57cec5SDimitry Andric         ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
36840b57cec5SDimitry Andric          __kmp_gtid_from_thread(this_thr)));
36850b57cec5SDimitry Andric     return;
36860b57cec5SDimitry Andric   }
36870b57cec5SDimitry Andric   threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
36880b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(threads_data != NULL);
36890b57cec5SDimitry Andric 
36900b57cec5SDimitry Andric   if (__kmp_tasking_mode == tskm_task_teams &&
36910b57cec5SDimitry Andric       (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
36920b57cec5SDimitry Andric     // Release any threads sleeping at the barrier, so that they can steal
36930b57cec5SDimitry Andric     // tasks and execute them.  In extra barrier mode, tasks do not sleep
36940b57cec5SDimitry Andric     // at the separate tasking barrier, so this isn't a problem.
36950b57cec5SDimitry Andric     for (i = 0; i < nthreads; i++) {
3696349cc55cSDimitry Andric       void *sleep_loc;
36970b57cec5SDimitry Andric       kmp_info_t *thread = threads_data[i].td.td_thr;
36980b57cec5SDimitry Andric 
36990b57cec5SDimitry Andric       if (i == this_thr->th.th_info.ds.ds_tid) {
37000b57cec5SDimitry Andric         continue;
37010b57cec5SDimitry Andric       }
37020b57cec5SDimitry Andric       // Since we haven't locked the thread's suspend mutex lock at this
37030b57cec5SDimitry Andric       // point, there is a small window where a thread might be putting
37040b57cec5SDimitry Andric       // itself to sleep, but hasn't set the th_sleep_loc field yet.
37050b57cec5SDimitry Andric       // To work around this, __kmp_execute_tasks_template() periodically checks
37060b57cec5SDimitry Andric       // see if other threads are sleeping (using the same random mechanism that
37070b57cec5SDimitry Andric       // is used for task stealing) and awakens them if they are.
37080b57cec5SDimitry Andric       if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
37090b57cec5SDimitry Andric           NULL) {
37100b57cec5SDimitry Andric         KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
37110b57cec5SDimitry Andric                       __kmp_gtid_from_thread(this_thr),
37120b57cec5SDimitry Andric                       __kmp_gtid_from_thread(thread)));
3713349cc55cSDimitry Andric         __kmp_null_resume_wrapper(thread);
37140b57cec5SDimitry Andric       } else {
37150b57cec5SDimitry Andric         KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
37160b57cec5SDimitry Andric                       __kmp_gtid_from_thread(this_thr),
37170b57cec5SDimitry Andric                       __kmp_gtid_from_thread(thread)));
37180b57cec5SDimitry Andric       }
37190b57cec5SDimitry Andric     }
37200b57cec5SDimitry Andric   }
37210b57cec5SDimitry Andric 
37220b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n",
37230b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr)));
37240b57cec5SDimitry Andric }
37250b57cec5SDimitry Andric 
37260b57cec5SDimitry Andric /* // TODO: Check the comment consistency
37270b57cec5SDimitry Andric  * Utility routines for "task teams".  A task team (kmp_task_t) is kind of
37280b57cec5SDimitry Andric  * like a shadow of the kmp_team_t data struct, with a different lifetime.
37290b57cec5SDimitry Andric  * After a child * thread checks into a barrier and calls __kmp_release() from
37300b57cec5SDimitry Andric  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
37310b57cec5SDimitry Andric  * longer assume that the kmp_team_t structure is intact (at any moment, the
3732fe6060f1SDimitry Andric  * primary thread may exit the barrier code and free the team data structure,
37330b57cec5SDimitry Andric  * and return the threads to the thread pool).
37340b57cec5SDimitry Andric  *
3735480093f4SDimitry Andric  * This does not work with the tasking code, as the thread is still
37360b57cec5SDimitry Andric  * expected to participate in the execution of any tasks that may have been
37370b57cec5SDimitry Andric  * spawned my a member of the team, and the thread still needs access to all
37380b57cec5SDimitry Andric  * to each thread in the team, so that it can steal work from it.
37390b57cec5SDimitry Andric  *
37400b57cec5SDimitry Andric  * Enter the existence of the kmp_task_team_t struct.  It employs a reference
3741fe6060f1SDimitry Andric  * counting mechanism, and is allocated by the primary thread before calling
37420b57cec5SDimitry Andric  * __kmp_<barrier_kind>_release, and then is release by the last thread to
37430b57cec5SDimitry Andric  * exit __kmp_<barrier_kind>_release at the next barrier.  I.e. the lifetimes
37440b57cec5SDimitry Andric  * of the kmp_task_team_t structs for consecutive barriers can overlap
3745fe6060f1SDimitry Andric  * (and will, unless the primary thread is the last thread to exit the barrier
37460b57cec5SDimitry Andric  * release phase, which is not typical). The existence of such a struct is
37470b57cec5SDimitry Andric  * useful outside the context of tasking.
37480b57cec5SDimitry Andric  *
37490b57cec5SDimitry Andric  * We currently use the existence of the threads array as an indicator that
37500b57cec5SDimitry Andric  * tasks were spawned since the last barrier.  If the structure is to be
37510b57cec5SDimitry Andric  * useful outside the context of tasking, then this will have to change, but
37525ffd83dbSDimitry Andric  * not setting the field minimizes the performance impact of tasking on
37530b57cec5SDimitry Andric  * barriers, when no explicit tasks were spawned (pushed, actually).
37540b57cec5SDimitry Andric  */
37550b57cec5SDimitry Andric 
37560b57cec5SDimitry Andric static kmp_task_team_t *__kmp_free_task_teams =
37570b57cec5SDimitry Andric     NULL; // Free list for task_team data structures
37580b57cec5SDimitry Andric // Lock for task team data structures
37590b57cec5SDimitry Andric kmp_bootstrap_lock_t __kmp_task_team_lock =
37600b57cec5SDimitry Andric     KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
37610b57cec5SDimitry Andric 
37620b57cec5SDimitry Andric // __kmp_alloc_task_deque:
37630b57cec5SDimitry Andric // Allocates a task deque for a particular thread, and initialize the necessary
37640b57cec5SDimitry Andric // data structures relating to the deque.  This only happens once per thread
37650b57cec5SDimitry Andric // per task team since task teams are recycled. No lock is needed during
37660b57cec5SDimitry Andric // allocation since each thread allocates its own deque.
37670b57cec5SDimitry Andric static void __kmp_alloc_task_deque(kmp_info_t *thread,
37680b57cec5SDimitry Andric                                    kmp_thread_data_t *thread_data) {
37690b57cec5SDimitry Andric   __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
37700b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
37710b57cec5SDimitry Andric 
37720b57cec5SDimitry Andric   // Initialize last stolen task field to "none"
37730b57cec5SDimitry Andric   thread_data->td.td_deque_last_stolen = -1;
37740b57cec5SDimitry Andric 
37750b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
37760b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
37770b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
37780b57cec5SDimitry Andric 
37790b57cec5SDimitry Andric   KE_TRACE(
37800b57cec5SDimitry Andric       10,
37810b57cec5SDimitry Andric       ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
37820b57cec5SDimitry Andric        __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
37830b57cec5SDimitry Andric   // Allocate space for task deque, and zero the deque
37840b57cec5SDimitry Andric   // Cannot use __kmp_thread_calloc() because threads not around for
37850b57cec5SDimitry Andric   // kmp_reap_task_team( ).
37860b57cec5SDimitry Andric   thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
37870b57cec5SDimitry Andric       INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
37880b57cec5SDimitry Andric   thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
37890b57cec5SDimitry Andric }
37900b57cec5SDimitry Andric 
37910b57cec5SDimitry Andric // __kmp_free_task_deque:
37920b57cec5SDimitry Andric // Deallocates a task deque for a particular thread. Happens at library
37930b57cec5SDimitry Andric // deallocation so don't need to reset all thread data fields.
37940b57cec5SDimitry Andric static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
37950b57cec5SDimitry Andric   if (thread_data->td.td_deque != NULL) {
37960b57cec5SDimitry Andric     __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
37970b57cec5SDimitry Andric     TCW_4(thread_data->td.td_deque_ntasks, 0);
37980b57cec5SDimitry Andric     __kmp_free(thread_data->td.td_deque);
37990b57cec5SDimitry Andric     thread_data->td.td_deque = NULL;
38000b57cec5SDimitry Andric     __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
38010b57cec5SDimitry Andric   }
38020b57cec5SDimitry Andric 
38030b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
38040b57cec5SDimitry Andric   // GEH: Figure out what to do here for td_susp_tied_tasks
38050b57cec5SDimitry Andric   if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
38060b57cec5SDimitry Andric     __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
38070b57cec5SDimitry Andric   }
38080b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
38090b57cec5SDimitry Andric }
38100b57cec5SDimitry Andric 
38110b57cec5SDimitry Andric // __kmp_realloc_task_threads_data:
38120b57cec5SDimitry Andric // Allocates a threads_data array for a task team, either by allocating an
38130b57cec5SDimitry Andric // initial array or enlarging an existing array.  Only the first thread to get
3814480093f4SDimitry Andric // the lock allocs or enlarges the array and re-initializes the array elements.
38150b57cec5SDimitry Andric // That thread returns "TRUE", the rest return "FALSE".
38160b57cec5SDimitry Andric // Assumes that the new array size is given by task_team -> tt.tt_nproc.
38170b57cec5SDimitry Andric // The current size is given by task_team -> tt.tt_max_threads.
38180b57cec5SDimitry Andric static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
38190b57cec5SDimitry Andric                                            kmp_task_team_t *task_team) {
38200b57cec5SDimitry Andric   kmp_thread_data_t **threads_data_p;
38210b57cec5SDimitry Andric   kmp_int32 nthreads, maxthreads;
38220b57cec5SDimitry Andric   int is_init_thread = FALSE;
38230b57cec5SDimitry Andric 
38240b57cec5SDimitry Andric   if (TCR_4(task_team->tt.tt_found_tasks)) {
38250b57cec5SDimitry Andric     // Already reallocated and initialized.
38260b57cec5SDimitry Andric     return FALSE;
38270b57cec5SDimitry Andric   }
38280b57cec5SDimitry Andric 
38290b57cec5SDimitry Andric   threads_data_p = &task_team->tt.tt_threads_data;
38300b57cec5SDimitry Andric   nthreads = task_team->tt.tt_nproc;
38310b57cec5SDimitry Andric   maxthreads = task_team->tt.tt_max_threads;
38320b57cec5SDimitry Andric 
38330b57cec5SDimitry Andric   // All threads must lock when they encounter the first task of the implicit
38340b57cec5SDimitry Andric   // task region to make sure threads_data fields are (re)initialized before
38350b57cec5SDimitry Andric   // used.
38360b57cec5SDimitry Andric   __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
38370b57cec5SDimitry Andric 
38380b57cec5SDimitry Andric   if (!TCR_4(task_team->tt.tt_found_tasks)) {
38390b57cec5SDimitry Andric     // first thread to enable tasking
38400b57cec5SDimitry Andric     kmp_team_t *team = thread->th.th_team;
38410b57cec5SDimitry Andric     int i;
38420b57cec5SDimitry Andric 
38430b57cec5SDimitry Andric     is_init_thread = TRUE;
38440b57cec5SDimitry Andric     if (maxthreads < nthreads) {
38450b57cec5SDimitry Andric 
38460b57cec5SDimitry Andric       if (*threads_data_p != NULL) {
38470b57cec5SDimitry Andric         kmp_thread_data_t *old_data = *threads_data_p;
38480b57cec5SDimitry Andric         kmp_thread_data_t *new_data = NULL;
38490b57cec5SDimitry Andric 
38500b57cec5SDimitry Andric         KE_TRACE(
38510b57cec5SDimitry Andric             10,
38520b57cec5SDimitry Andric             ("__kmp_realloc_task_threads_data: T#%d reallocating "
38530b57cec5SDimitry Andric              "threads data for task_team %p, new_size = %d, old_size = %d\n",
38540b57cec5SDimitry Andric              __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
38550b57cec5SDimitry Andric         // Reallocate threads_data to have more elements than current array
38560b57cec5SDimitry Andric         // Cannot use __kmp_thread_realloc() because threads not around for
38570b57cec5SDimitry Andric         // kmp_reap_task_team( ).  Note all new array entries are initialized
38580b57cec5SDimitry Andric         // to zero by __kmp_allocate().
38590b57cec5SDimitry Andric         new_data = (kmp_thread_data_t *)__kmp_allocate(
38600b57cec5SDimitry Andric             nthreads * sizeof(kmp_thread_data_t));
38610b57cec5SDimitry Andric         // copy old data to new data
38620b57cec5SDimitry Andric         KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t),
38630b57cec5SDimitry Andric                      (void *)old_data, maxthreads * sizeof(kmp_thread_data_t));
38640b57cec5SDimitry Andric 
38650b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
38660b57cec5SDimitry Andric         // GEH: Figure out if this is the right thing to do
38670b57cec5SDimitry Andric         for (i = maxthreads; i < nthreads; i++) {
38680b57cec5SDimitry Andric           kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
38690b57cec5SDimitry Andric           __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
38700b57cec5SDimitry Andric         }
38710b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
38720b57cec5SDimitry Andric        // Install the new data and free the old data
38730b57cec5SDimitry Andric         (*threads_data_p) = new_data;
38740b57cec5SDimitry Andric         __kmp_free(old_data);
38750b57cec5SDimitry Andric       } else {
38760b57cec5SDimitry Andric         KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating "
38770b57cec5SDimitry Andric                       "threads data for task_team %p, size = %d\n",
38780b57cec5SDimitry Andric                       __kmp_gtid_from_thread(thread), task_team, nthreads));
38790b57cec5SDimitry Andric         // Make the initial allocate for threads_data array, and zero entries
38800b57cec5SDimitry Andric         // Cannot use __kmp_thread_calloc() because threads not around for
38810b57cec5SDimitry Andric         // kmp_reap_task_team( ).
38820b57cec5SDimitry Andric         *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
38830b57cec5SDimitry Andric             nthreads * sizeof(kmp_thread_data_t));
38840b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
38850b57cec5SDimitry Andric         // GEH: Figure out if this is the right thing to do
38860b57cec5SDimitry Andric         for (i = 0; i < nthreads; i++) {
38870b57cec5SDimitry Andric           kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
38880b57cec5SDimitry Andric           __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
38890b57cec5SDimitry Andric         }
38900b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
38910b57cec5SDimitry Andric       }
38920b57cec5SDimitry Andric       task_team->tt.tt_max_threads = nthreads;
38930b57cec5SDimitry Andric     } else {
38940b57cec5SDimitry Andric       // If array has (more than) enough elements, go ahead and use it
38950b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(*threads_data_p != NULL);
38960b57cec5SDimitry Andric     }
38970b57cec5SDimitry Andric 
38980b57cec5SDimitry Andric     // initialize threads_data pointers back to thread_info structures
38990b57cec5SDimitry Andric     for (i = 0; i < nthreads; i++) {
39000b57cec5SDimitry Andric       kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
39010b57cec5SDimitry Andric       thread_data->td.td_thr = team->t.t_threads[i];
39020b57cec5SDimitry Andric 
39030b57cec5SDimitry Andric       if (thread_data->td.td_deque_last_stolen >= nthreads) {
39040b57cec5SDimitry Andric         // The last stolen field survives across teams / barrier, and the number
39050b57cec5SDimitry Andric         // of threads may have changed.  It's possible (likely?) that a new
39060b57cec5SDimitry Andric         // parallel region will exhibit the same behavior as previous region.
39070b57cec5SDimitry Andric         thread_data->td.td_deque_last_stolen = -1;
39080b57cec5SDimitry Andric       }
39090b57cec5SDimitry Andric     }
39100b57cec5SDimitry Andric 
39110b57cec5SDimitry Andric     KMP_MB();
39120b57cec5SDimitry Andric     TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
39130b57cec5SDimitry Andric   }
39140b57cec5SDimitry Andric 
39150b57cec5SDimitry Andric   __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
39160b57cec5SDimitry Andric   return is_init_thread;
39170b57cec5SDimitry Andric }
39180b57cec5SDimitry Andric 
39190b57cec5SDimitry Andric // __kmp_free_task_threads_data:
39200b57cec5SDimitry Andric // Deallocates a threads_data array for a task team, including any attached
39210b57cec5SDimitry Andric // tasking deques.  Only occurs at library shutdown.
39220b57cec5SDimitry Andric static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
39230b57cec5SDimitry Andric   __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
39240b57cec5SDimitry Andric   if (task_team->tt.tt_threads_data != NULL) {
39250b57cec5SDimitry Andric     int i;
39260b57cec5SDimitry Andric     for (i = 0; i < task_team->tt.tt_max_threads; i++) {
39270b57cec5SDimitry Andric       __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
39280b57cec5SDimitry Andric     }
39290b57cec5SDimitry Andric     __kmp_free(task_team->tt.tt_threads_data);
39300b57cec5SDimitry Andric     task_team->tt.tt_threads_data = NULL;
39310b57cec5SDimitry Andric   }
39320b57cec5SDimitry Andric   __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
39330b57cec5SDimitry Andric }
39340b57cec5SDimitry Andric 
393581ad6265SDimitry Andric // __kmp_free_task_pri_list:
393681ad6265SDimitry Andric // Deallocates tasking deques used for priority tasks.
393781ad6265SDimitry Andric // Only occurs at library shutdown.
393881ad6265SDimitry Andric static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
393981ad6265SDimitry Andric   __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
394081ad6265SDimitry Andric   if (task_team->tt.tt_task_pri_list != NULL) {
394181ad6265SDimitry Andric     kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
394281ad6265SDimitry Andric     while (list != NULL) {
394381ad6265SDimitry Andric       kmp_task_pri_t *next = list->next;
394481ad6265SDimitry Andric       __kmp_free_task_deque(&list->td);
394581ad6265SDimitry Andric       __kmp_free(list);
394681ad6265SDimitry Andric       list = next;
394781ad6265SDimitry Andric     }
394881ad6265SDimitry Andric     task_team->tt.tt_task_pri_list = NULL;
394981ad6265SDimitry Andric   }
395081ad6265SDimitry Andric   __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
395181ad6265SDimitry Andric }
395281ad6265SDimitry Andric 
3953*0fca6ea1SDimitry Andric static inline void __kmp_task_team_init(kmp_task_team_t *task_team,
3954*0fca6ea1SDimitry Andric                                         kmp_team_t *team) {
3955*0fca6ea1SDimitry Andric   int team_nth = team->t.t_nproc;
3956*0fca6ea1SDimitry Andric   // Only need to init if task team is isn't active or team size changed
3957*0fca6ea1SDimitry Andric   if (!task_team->tt.tt_active || team_nth != task_team->tt.tt_nproc) {
3958*0fca6ea1SDimitry Andric     TCW_4(task_team->tt.tt_found_tasks, FALSE);
3959*0fca6ea1SDimitry Andric     TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3960*0fca6ea1SDimitry Andric     TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3961*0fca6ea1SDimitry Andric     TCW_4(task_team->tt.tt_nproc, team_nth);
3962*0fca6ea1SDimitry Andric     KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, team_nth);
3963*0fca6ea1SDimitry Andric     TCW_4(task_team->tt.tt_active, TRUE);
3964*0fca6ea1SDimitry Andric   }
3965*0fca6ea1SDimitry Andric }
3966*0fca6ea1SDimitry Andric 
39670b57cec5SDimitry Andric // __kmp_allocate_task_team:
39680b57cec5SDimitry Andric // Allocates a task team associated with a specific team, taking it from
39690b57cec5SDimitry Andric // the global task team free list if possible.  Also initializes data
39700b57cec5SDimitry Andric // structures.
39710b57cec5SDimitry Andric static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
39720b57cec5SDimitry Andric                                                  kmp_team_t *team) {
39730b57cec5SDimitry Andric   kmp_task_team_t *task_team = NULL;
39740b57cec5SDimitry Andric 
39750b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n",
39760b57cec5SDimitry Andric                 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
39770b57cec5SDimitry Andric 
39780b57cec5SDimitry Andric   if (TCR_PTR(__kmp_free_task_teams) != NULL) {
39790b57cec5SDimitry Andric     // Take a task team from the task team pool
39800b57cec5SDimitry Andric     __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
39810b57cec5SDimitry Andric     if (__kmp_free_task_teams != NULL) {
39820b57cec5SDimitry Andric       task_team = __kmp_free_task_teams;
39830b57cec5SDimitry Andric       TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
39840b57cec5SDimitry Andric       task_team->tt.tt_next = NULL;
39850b57cec5SDimitry Andric     }
39860b57cec5SDimitry Andric     __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
39870b57cec5SDimitry Andric   }
39880b57cec5SDimitry Andric 
39890b57cec5SDimitry Andric   if (task_team == NULL) {
39900b57cec5SDimitry Andric     KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "
39910b57cec5SDimitry Andric                   "task team for team %p\n",
39920b57cec5SDimitry Andric                   __kmp_gtid_from_thread(thread), team));
3993e8d8bef9SDimitry Andric     // Allocate a new task team if one is not available. Cannot use
3994e8d8bef9SDimitry Andric     // __kmp_thread_malloc because threads not around for kmp_reap_task_team.
39950b57cec5SDimitry Andric     task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
39960b57cec5SDimitry Andric     __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
399781ad6265SDimitry Andric     __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3998e8d8bef9SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
3999e8d8bef9SDimitry Andric     // suppress race conditions detection on synchronization flags in debug mode
4000e8d8bef9SDimitry Andric     // this helps to analyze library internals eliminating false positives
4001e8d8bef9SDimitry Andric     __itt_suppress_mark_range(
4002e8d8bef9SDimitry Andric         __itt_suppress_range, __itt_suppress_threading_errors,
4003e8d8bef9SDimitry Andric         &task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks));
4004e8d8bef9SDimitry Andric     __itt_suppress_mark_range(__itt_suppress_range,
4005e8d8bef9SDimitry Andric                               __itt_suppress_threading_errors,
4006e8d8bef9SDimitry Andric                               CCAST(kmp_uint32 *, &task_team->tt.tt_active),
4007e8d8bef9SDimitry Andric                               sizeof(task_team->tt.tt_active));
4008e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
4009e8d8bef9SDimitry Andric     // Note: __kmp_allocate zeroes returned memory, othewise we would need:
40100b57cec5SDimitry Andric     // task_team->tt.tt_threads_data = NULL;
40110b57cec5SDimitry Andric     // task_team->tt.tt_max_threads = 0;
40120b57cec5SDimitry Andric     // task_team->tt.tt_next = NULL;
40130b57cec5SDimitry Andric   }
40140b57cec5SDimitry Andric 
4015*0fca6ea1SDimitry Andric   __kmp_task_team_init(task_team, team);
40160b57cec5SDimitry Andric 
40170b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
40180b57cec5SDimitry Andric                 "unfinished_threads init'd to %d\n",
40190b57cec5SDimitry Andric                 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
40200b57cec5SDimitry Andric                 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
40210b57cec5SDimitry Andric   return task_team;
40220b57cec5SDimitry Andric }
40230b57cec5SDimitry Andric 
40240b57cec5SDimitry Andric // __kmp_free_task_team:
40250b57cec5SDimitry Andric // Frees the task team associated with a specific thread, and adds it
40260b57cec5SDimitry Andric // to the global task team free list.
40270b57cec5SDimitry Andric void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
40280b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n",
40290b57cec5SDimitry Andric                 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
40300b57cec5SDimitry Andric 
40310b57cec5SDimitry Andric   // Put task team back on free list
40320b57cec5SDimitry Andric   __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
40330b57cec5SDimitry Andric 
40340b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
40350b57cec5SDimitry Andric   task_team->tt.tt_next = __kmp_free_task_teams;
40360b57cec5SDimitry Andric   TCW_PTR(__kmp_free_task_teams, task_team);
40370b57cec5SDimitry Andric 
40380b57cec5SDimitry Andric   __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
40390b57cec5SDimitry Andric }
40400b57cec5SDimitry Andric 
40410b57cec5SDimitry Andric // __kmp_reap_task_teams:
40420b57cec5SDimitry Andric // Free all the task teams on the task team free list.
40430b57cec5SDimitry Andric // Should only be done during library shutdown.
40440b57cec5SDimitry Andric // Cannot do anything that needs a thread structure or gtid since they are
40450b57cec5SDimitry Andric // already gone.
40460b57cec5SDimitry Andric void __kmp_reap_task_teams(void) {
40470b57cec5SDimitry Andric   kmp_task_team_t *task_team;
40480b57cec5SDimitry Andric 
40490b57cec5SDimitry Andric   if (TCR_PTR(__kmp_free_task_teams) != NULL) {
40500b57cec5SDimitry Andric     // Free all task_teams on the free list
40510b57cec5SDimitry Andric     __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
40520b57cec5SDimitry Andric     while ((task_team = __kmp_free_task_teams) != NULL) {
40530b57cec5SDimitry Andric       __kmp_free_task_teams = task_team->tt.tt_next;
40540b57cec5SDimitry Andric       task_team->tt.tt_next = NULL;
40550b57cec5SDimitry Andric 
40560b57cec5SDimitry Andric       // Free threads_data if necessary
40570b57cec5SDimitry Andric       if (task_team->tt.tt_threads_data != NULL) {
40580b57cec5SDimitry Andric         __kmp_free_task_threads_data(task_team);
40590b57cec5SDimitry Andric       }
406081ad6265SDimitry Andric       if (task_team->tt.tt_task_pri_list != NULL) {
406181ad6265SDimitry Andric         __kmp_free_task_pri_list(task_team);
406281ad6265SDimitry Andric       }
40630b57cec5SDimitry Andric       __kmp_free(task_team);
40640b57cec5SDimitry Andric     }
40650b57cec5SDimitry Andric     __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
40660b57cec5SDimitry Andric   }
40670b57cec5SDimitry Andric }
40680b57cec5SDimitry Andric 
4069*0fca6ea1SDimitry Andric // View the array of two task team pointers as a pair of pointers:
4070*0fca6ea1SDimitry Andric //  1) a single task_team pointer
4071*0fca6ea1SDimitry Andric //  2) next pointer for stack
4072*0fca6ea1SDimitry Andric // Serial teams can create a stack of task teams for nested serial teams.
4073*0fca6ea1SDimitry Andric void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team) {
4074*0fca6ea1SDimitry Andric   KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4075*0fca6ea1SDimitry Andric   kmp_task_team_list_t *current =
4076*0fca6ea1SDimitry Andric       (kmp_task_team_list_t *)(&team->t.t_task_team[0]);
4077*0fca6ea1SDimitry Andric   kmp_task_team_list_t *node =
4078*0fca6ea1SDimitry Andric       (kmp_task_team_list_t *)__kmp_allocate(sizeof(kmp_task_team_list_t));
4079*0fca6ea1SDimitry Andric   node->task_team = current->task_team;
4080*0fca6ea1SDimitry Andric   node->next = current->next;
4081*0fca6ea1SDimitry Andric   thread->th.th_task_team = current->task_team = NULL;
4082*0fca6ea1SDimitry Andric   current->next = node;
4083*0fca6ea1SDimitry Andric }
4084*0fca6ea1SDimitry Andric 
4085*0fca6ea1SDimitry Andric // Serial team pops a task team off the stack
4086*0fca6ea1SDimitry Andric void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team) {
4087*0fca6ea1SDimitry Andric   KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4088*0fca6ea1SDimitry Andric   kmp_task_team_list_t *current =
4089*0fca6ea1SDimitry Andric       (kmp_task_team_list_t *)(&team->t.t_task_team[0]);
4090*0fca6ea1SDimitry Andric   if (current->task_team) {
4091*0fca6ea1SDimitry Andric     __kmp_free_task_team(thread, current->task_team);
4092*0fca6ea1SDimitry Andric   }
4093*0fca6ea1SDimitry Andric   kmp_task_team_list_t *next = current->next;
4094*0fca6ea1SDimitry Andric   if (next) {
4095*0fca6ea1SDimitry Andric     current->task_team = next->task_team;
4096*0fca6ea1SDimitry Andric     current->next = next->next;
4097*0fca6ea1SDimitry Andric     KMP_DEBUG_ASSERT(next != current);
4098*0fca6ea1SDimitry Andric     __kmp_free(next);
4099*0fca6ea1SDimitry Andric     thread->th.th_task_team = current->task_team;
4100*0fca6ea1SDimitry Andric   }
4101*0fca6ea1SDimitry Andric }
4102*0fca6ea1SDimitry Andric 
41030b57cec5SDimitry Andric // __kmp_wait_to_unref_task_teams:
41040b57cec5SDimitry Andric // Some threads could still be in the fork barrier release code, possibly
41050b57cec5SDimitry Andric // trying to steal tasks.  Wait for each thread to unreference its task team.
41060b57cec5SDimitry Andric void __kmp_wait_to_unref_task_teams(void) {
41070b57cec5SDimitry Andric   kmp_info_t *thread;
41080b57cec5SDimitry Andric   kmp_uint32 spins;
410904eeddc0SDimitry Andric   kmp_uint64 time;
41100b57cec5SDimitry Andric   int done;
41110b57cec5SDimitry Andric 
41120b57cec5SDimitry Andric   KMP_INIT_YIELD(spins);
411304eeddc0SDimitry Andric   KMP_INIT_BACKOFF(time);
41140b57cec5SDimitry Andric 
41150b57cec5SDimitry Andric   for (;;) {
41160b57cec5SDimitry Andric     done = TRUE;
41170b57cec5SDimitry Andric 
41180b57cec5SDimitry Andric     // TODO: GEH - this may be is wrong because some sync would be necessary
41190b57cec5SDimitry Andric     // in case threads are added to the pool during the traversal. Need to
41200b57cec5SDimitry Andric     // verify that lock for thread pool is held when calling this routine.
41210b57cec5SDimitry Andric     for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
41220b57cec5SDimitry Andric          thread = thread->th.th_next_pool) {
41230b57cec5SDimitry Andric #if KMP_OS_WINDOWS
41240b57cec5SDimitry Andric       DWORD exit_val;
41250b57cec5SDimitry Andric #endif
41260b57cec5SDimitry Andric       if (TCR_PTR(thread->th.th_task_team) == NULL) {
41270b57cec5SDimitry Andric         KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
41280b57cec5SDimitry Andric                       __kmp_gtid_from_thread(thread)));
41290b57cec5SDimitry Andric         continue;
41300b57cec5SDimitry Andric       }
41310b57cec5SDimitry Andric #if KMP_OS_WINDOWS
41320b57cec5SDimitry Andric       // TODO: GEH - add this check for Linux* OS / OS X* as well?
41330b57cec5SDimitry Andric       if (!__kmp_is_thread_alive(thread, &exit_val)) {
41340b57cec5SDimitry Andric         thread->th.th_task_team = NULL;
41350b57cec5SDimitry Andric         continue;
41360b57cec5SDimitry Andric       }
41370b57cec5SDimitry Andric #endif
41380b57cec5SDimitry Andric 
41390b57cec5SDimitry Andric       done = FALSE; // Because th_task_team pointer is not NULL for this thread
41400b57cec5SDimitry Andric 
41410b57cec5SDimitry Andric       KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
41420b57cec5SDimitry Andric                     "unreference task_team\n",
41430b57cec5SDimitry Andric                     __kmp_gtid_from_thread(thread)));
41440b57cec5SDimitry Andric 
41450b57cec5SDimitry Andric       if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
4146349cc55cSDimitry Andric         void *sleep_loc;
41470b57cec5SDimitry Andric         // If the thread is sleeping, awaken it.
41480b57cec5SDimitry Andric         if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
41490b57cec5SDimitry Andric             NULL) {
41500b57cec5SDimitry Andric           KA_TRACE(
41510b57cec5SDimitry Andric               10,
41520b57cec5SDimitry Andric               ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
41530b57cec5SDimitry Andric                __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
4154349cc55cSDimitry Andric           __kmp_null_resume_wrapper(thread);
41550b57cec5SDimitry Andric         }
41560b57cec5SDimitry Andric       }
41570b57cec5SDimitry Andric     }
41580b57cec5SDimitry Andric     if (done) {
41590b57cec5SDimitry Andric       break;
41600b57cec5SDimitry Andric     }
41610b57cec5SDimitry Andric 
41620b57cec5SDimitry Andric     // If oversubscribed or have waited a bit, yield.
416304eeddc0SDimitry Andric     KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
41640b57cec5SDimitry Andric   }
41650b57cec5SDimitry Andric }
41660b57cec5SDimitry Andric 
41670b57cec5SDimitry Andric // __kmp_task_team_setup:  Create a task_team for the current team, but use
41680b57cec5SDimitry Andric // an already created, unused one if it already exists.
4169*0fca6ea1SDimitry Andric void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team) {
41700b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
41710b57cec5SDimitry Andric 
4172*0fca6ea1SDimitry Andric   // For the serial and root teams, setup the first task team pointer to point
4173*0fca6ea1SDimitry Andric   // to task team. The other pointer is a stack of task teams from previous
4174*0fca6ea1SDimitry Andric   // serial levels.
4175*0fca6ea1SDimitry Andric   if (team == this_thr->th.th_serial_team ||
4176*0fca6ea1SDimitry Andric       team == this_thr->th.th_root->r.r_root_team) {
4177*0fca6ea1SDimitry Andric     KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4178*0fca6ea1SDimitry Andric     if (team->t.t_task_team[0] == NULL) {
4179*0fca6ea1SDimitry Andric       team->t.t_task_team[0] = __kmp_allocate_task_team(this_thr, team);
4180*0fca6ea1SDimitry Andric       KA_TRACE(
4181*0fca6ea1SDimitry Andric           20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
4182*0fca6ea1SDimitry Andric                " for serial/root team %p\n",
4183*0fca6ea1SDimitry Andric                __kmp_gtid_from_thread(this_thr), team->t.t_task_team[0], team));
4184*0fca6ea1SDimitry Andric 
4185*0fca6ea1SDimitry Andric     } else
4186*0fca6ea1SDimitry Andric       __kmp_task_team_init(team->t.t_task_team[0], team);
4187*0fca6ea1SDimitry Andric     return;
4188*0fca6ea1SDimitry Andric   }
4189*0fca6ea1SDimitry Andric 
41900b57cec5SDimitry Andric   // If this task_team hasn't been created yet, allocate it. It will be used in
41910b57cec5SDimitry Andric   // the region after the next.
41920b57cec5SDimitry Andric   // If it exists, it is the current task team and shouldn't be touched yet as
41930b57cec5SDimitry Andric   // it may still be in use.
4194*0fca6ea1SDimitry Andric   if (team->t.t_task_team[this_thr->th.th_task_state] == NULL) {
41950b57cec5SDimitry Andric     team->t.t_task_team[this_thr->th.th_task_state] =
41960b57cec5SDimitry Andric         __kmp_allocate_task_team(this_thr, team);
4197fe6060f1SDimitry Andric     KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
41980b57cec5SDimitry Andric                   " for team %d at parity=%d\n",
41990b57cec5SDimitry Andric                   __kmp_gtid_from_thread(this_thr),
4200fe6060f1SDimitry Andric                   team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
42010b57cec5SDimitry Andric                   this_thr->th.th_task_state));
42020b57cec5SDimitry Andric   }
42030b57cec5SDimitry Andric 
42040b57cec5SDimitry Andric   // After threads exit the release, they will call sync, and then point to this
42050b57cec5SDimitry Andric   // other task_team; make sure it is allocated and properly initialized. As
42060b57cec5SDimitry Andric   // threads spin in the barrier release phase, they will continue to use the
42070b57cec5SDimitry Andric   // previous task_team struct(above), until they receive the signal to stop
42080b57cec5SDimitry Andric   // checking for tasks (they can't safely reference the kmp_team_t struct,
4209*0fca6ea1SDimitry Andric   // which could be reallocated by the primary thread).
42100b57cec5SDimitry Andric   int other_team = 1 - this_thr->th.th_task_state;
4211fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
42120b57cec5SDimitry Andric   if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
4213*0fca6ea1SDimitry Andric     team->t.t_task_team[other_team] = __kmp_allocate_task_team(this_thr, team);
4214fe6060f1SDimitry Andric     KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created second new "
42150b57cec5SDimitry Andric                   "task_team %p for team %d at parity=%d\n",
42160b57cec5SDimitry Andric                   __kmp_gtid_from_thread(this_thr),
4217fe6060f1SDimitry Andric                   team->t.t_task_team[other_team], team->t.t_id, other_team));
42180b57cec5SDimitry Andric   } else { // Leave the old task team struct in place for the upcoming region;
42190b57cec5SDimitry Andric     // adjust as needed
42200b57cec5SDimitry Andric     kmp_task_team_t *task_team = team->t.t_task_team[other_team];
4221*0fca6ea1SDimitry Andric     __kmp_task_team_init(task_team, team);
42220b57cec5SDimitry Andric     // if team size has changed, the first thread to enable tasking will
42230b57cec5SDimitry Andric     // realloc threads_data if necessary
4224fe6060f1SDimitry Andric     KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d reset next task_team "
42250b57cec5SDimitry Andric                   "%p for team %d at parity=%d\n",
42260b57cec5SDimitry Andric                   __kmp_gtid_from_thread(this_thr),
4227fe6060f1SDimitry Andric                   team->t.t_task_team[other_team], team->t.t_id, other_team));
42280b57cec5SDimitry Andric   }
4229e8d8bef9SDimitry Andric 
4230e8d8bef9SDimitry Andric   // For regular thread, task enabling should be called when the task is going
4231e8d8bef9SDimitry Andric   // to be pushed to a dequeue. However, for the hidden helper thread, we need
4232e8d8bef9SDimitry Andric   // it ahead of time so that some operations can be performed without race
4233e8d8bef9SDimitry Andric   // condition.
4234e8d8bef9SDimitry Andric   if (this_thr == __kmp_hidden_helper_main_thread) {
4235e8d8bef9SDimitry Andric     for (int i = 0; i < 2; ++i) {
4236e8d8bef9SDimitry Andric       kmp_task_team_t *task_team = team->t.t_task_team[i];
4237e8d8bef9SDimitry Andric       if (KMP_TASKING_ENABLED(task_team)) {
4238e8d8bef9SDimitry Andric         continue;
4239e8d8bef9SDimitry Andric       }
4240e8d8bef9SDimitry Andric       __kmp_enable_tasking(task_team, this_thr);
4241e8d8bef9SDimitry Andric       for (int j = 0; j < task_team->tt.tt_nproc; ++j) {
4242e8d8bef9SDimitry Andric         kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
4243e8d8bef9SDimitry Andric         if (thread_data->td.td_deque == NULL) {
4244e8d8bef9SDimitry Andric           __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
4245e8d8bef9SDimitry Andric         }
4246e8d8bef9SDimitry Andric       }
4247e8d8bef9SDimitry Andric     }
4248e8d8bef9SDimitry Andric   }
42490b57cec5SDimitry Andric }
42500b57cec5SDimitry Andric 
42510b57cec5SDimitry Andric // __kmp_task_team_sync: Propagation of task team data from team to threads
42520b57cec5SDimitry Andric // which happens just after the release phase of a team barrier.  This may be
4253*0fca6ea1SDimitry Andric // called by any thread. This is not called for serial or root teams.
42540b57cec5SDimitry Andric void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
42550b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4256*0fca6ea1SDimitry Andric   KMP_DEBUG_ASSERT(team != this_thr->th.th_serial_team);
4257*0fca6ea1SDimitry Andric   KMP_DEBUG_ASSERT(team != this_thr->th.th_root->r.r_root_team);
42580b57cec5SDimitry Andric 
42590b57cec5SDimitry Andric   // Toggle the th_task_state field, to switch which task_team this thread
42600b57cec5SDimitry Andric   // refers to
4261e8d8bef9SDimitry Andric   this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
4262e8d8bef9SDimitry Andric 
42630b57cec5SDimitry Andric   // It is now safe to propagate the task team pointer from the team struct to
42640b57cec5SDimitry Andric   // the current thread.
42650b57cec5SDimitry Andric   TCW_PTR(this_thr->th.th_task_team,
42660b57cec5SDimitry Andric           team->t.t_task_team[this_thr->th.th_task_state]);
42670b57cec5SDimitry Andric   KA_TRACE(20,
42680b57cec5SDimitry Andric            ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
42690b57cec5SDimitry Andric             "%p from Team #%d (parity=%d)\n",
42700b57cec5SDimitry Andric             __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
4271fe6060f1SDimitry Andric             team->t.t_id, this_thr->th.th_task_state));
42720b57cec5SDimitry Andric }
42730b57cec5SDimitry Andric 
4274fe6060f1SDimitry Andric // __kmp_task_team_wait: Primary thread waits for outstanding tasks after the
4275*0fca6ea1SDimitry Andric // barrier gather phase. Only called by the primary thread.
42760b57cec5SDimitry Andric //
42770b57cec5SDimitry Andric // wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off
4278fe6060f1SDimitry Andric // by passing in 0 optionally as the last argument. When wait is zero, primary
42790b57cec5SDimitry Andric // thread does not wait for unfinished_threads to reach 0.
42800b57cec5SDimitry Andric void __kmp_task_team_wait(
42810b57cec5SDimitry Andric     kmp_info_t *this_thr,
42820b57cec5SDimitry Andric     kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) {
42830b57cec5SDimitry Andric   kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
42840b57cec5SDimitry Andric 
42850b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
42860b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
42870b57cec5SDimitry Andric 
42880b57cec5SDimitry Andric   if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
42890b57cec5SDimitry Andric     if (wait) {
4290fe6060f1SDimitry Andric       KA_TRACE(20, ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "
42910b57cec5SDimitry Andric                     "(for unfinished_threads to reach 0) on task_team = %p\n",
42920b57cec5SDimitry Andric                     __kmp_gtid_from_thread(this_thr), task_team));
42930b57cec5SDimitry Andric       // Worker threads may have dropped through to release phase, but could
42940b57cec5SDimitry Andric       // still be executing tasks. Wait here for tasks to complete. To avoid
4295fe6060f1SDimitry Andric       // memory contention, only primary thread checks termination condition.
4296e8d8bef9SDimitry Andric       kmp_flag_32<false, false> flag(
4297e8d8bef9SDimitry Andric           RCAST(std::atomic<kmp_uint32> *,
42980b57cec5SDimitry Andric                 &task_team->tt.tt_unfinished_threads),
42990b57cec5SDimitry Andric           0U);
43000b57cec5SDimitry Andric       flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
43010b57cec5SDimitry Andric     }
43020b57cec5SDimitry Andric     // Deactivate the old task team, so that the worker threads will stop
43030b57cec5SDimitry Andric     // referencing it while spinning.
43040b57cec5SDimitry Andric     KA_TRACE(
43050b57cec5SDimitry Andric         20,
4306fe6060f1SDimitry Andric         ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
43070b57cec5SDimitry Andric          "setting active to false, setting local and team's pointer to NULL\n",
43080b57cec5SDimitry Andric          __kmp_gtid_from_thread(this_thr), task_team));
43090b57cec5SDimitry Andric     TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
431004eeddc0SDimitry Andric     TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
43110b57cec5SDimitry Andric     KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
43120b57cec5SDimitry Andric     TCW_SYNC_4(task_team->tt.tt_active, FALSE);
43130b57cec5SDimitry Andric     KMP_MB();
43140b57cec5SDimitry Andric 
43150b57cec5SDimitry Andric     TCW_PTR(this_thr->th.th_task_team, NULL);
43160b57cec5SDimitry Andric   }
43170b57cec5SDimitry Andric }
43180b57cec5SDimitry Andric 
43190b57cec5SDimitry Andric // __kmp_tasking_barrier:
4320e8d8bef9SDimitry Andric // This routine is called only when __kmp_tasking_mode == tskm_extra_barrier.
43210b57cec5SDimitry Andric // Internal function to execute all tasks prior to a regular barrier or a join
43220b57cec5SDimitry Andric // barrier. It is a full barrier itself, which unfortunately turns regular
43230b57cec5SDimitry Andric // barriers into double barriers and join barriers into 1 1/2 barriers.
43240b57cec5SDimitry Andric void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
43250b57cec5SDimitry Andric   std::atomic<kmp_uint32> *spin = RCAST(
43260b57cec5SDimitry Andric       std::atomic<kmp_uint32> *,
43270b57cec5SDimitry Andric       &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
43280b57cec5SDimitry Andric   int flag = FALSE;
43290b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
43300b57cec5SDimitry Andric 
43310b57cec5SDimitry Andric #if USE_ITT_BUILD
43320b57cec5SDimitry Andric   KMP_FSYNC_SPIN_INIT(spin, NULL);
43330b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
4334e8d8bef9SDimitry Andric   kmp_flag_32<false, false> spin_flag(spin, 0U);
43350b57cec5SDimitry Andric   while (!spin_flag.execute_tasks(thread, gtid, TRUE,
43360b57cec5SDimitry Andric                                   &flag USE_ITT_BUILD_ARG(NULL), 0)) {
43370b57cec5SDimitry Andric #if USE_ITT_BUILD
43380b57cec5SDimitry Andric     // TODO: What about itt_sync_obj??
43390b57cec5SDimitry Andric     KMP_FSYNC_SPIN_PREPARE(RCAST(void *, spin));
43400b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
43410b57cec5SDimitry Andric 
43420b57cec5SDimitry Andric     if (TCR_4(__kmp_global.g.g_done)) {
43430b57cec5SDimitry Andric       if (__kmp_global.g.g_abort)
43440b57cec5SDimitry Andric         __kmp_abort_thread();
43450b57cec5SDimitry Andric       break;
43460b57cec5SDimitry Andric     }
43470b57cec5SDimitry Andric     KMP_YIELD(TRUE);
43480b57cec5SDimitry Andric   }
43490b57cec5SDimitry Andric #if USE_ITT_BUILD
43500b57cec5SDimitry Andric   KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin));
43510b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
43520b57cec5SDimitry Andric }
43530b57cec5SDimitry Andric 
43540b57cec5SDimitry Andric // __kmp_give_task puts a task into a given thread queue if:
43550b57cec5SDimitry Andric //  - the queue for that thread was created
43560b57cec5SDimitry Andric //  - there's space in that queue
43570b57cec5SDimitry Andric // Because of this, __kmp_push_task needs to check if there's space after
43580b57cec5SDimitry Andric // getting the lock
43590b57cec5SDimitry Andric static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
43600b57cec5SDimitry Andric                             kmp_int32 pass) {
43610b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
43620b57cec5SDimitry Andric   kmp_task_team_t *task_team = taskdata->td_task_team;
43630b57cec5SDimitry Andric 
43640b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n",
43650b57cec5SDimitry Andric                 taskdata, tid));
43660b57cec5SDimitry Andric 
43670b57cec5SDimitry Andric   // If task_team is NULL something went really bad...
43680b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task_team != NULL);
43690b57cec5SDimitry Andric 
43700b57cec5SDimitry Andric   bool result = false;
43710b57cec5SDimitry Andric   kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
43720b57cec5SDimitry Andric 
43730b57cec5SDimitry Andric   if (thread_data->td.td_deque == NULL) {
43740b57cec5SDimitry Andric     // There's no queue in this thread, go find another one
43750b57cec5SDimitry Andric     // We're guaranteed that at least one thread has a queue
43760b57cec5SDimitry Andric     KA_TRACE(30,
43770b57cec5SDimitry Andric              ("__kmp_give_task: thread %d has no queue while giving task %p.\n",
43780b57cec5SDimitry Andric               tid, taskdata));
43790b57cec5SDimitry Andric     return result;
43800b57cec5SDimitry Andric   }
43810b57cec5SDimitry Andric 
43820b57cec5SDimitry Andric   if (TCR_4(thread_data->td.td_deque_ntasks) >=
43830b57cec5SDimitry Andric       TASK_DEQUE_SIZE(thread_data->td)) {
43840b57cec5SDimitry Andric     KA_TRACE(
43850b57cec5SDimitry Andric         30,
43860b57cec5SDimitry Andric         ("__kmp_give_task: queue is full while giving task %p to thread %d.\n",
43870b57cec5SDimitry Andric          taskdata, tid));
43880b57cec5SDimitry Andric 
43890b57cec5SDimitry Andric     // if this deque is bigger than the pass ratio give a chance to another
43900b57cec5SDimitry Andric     // thread
43910b57cec5SDimitry Andric     if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
43920b57cec5SDimitry Andric       return result;
43930b57cec5SDimitry Andric 
43940b57cec5SDimitry Andric     __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
43955ffd83dbSDimitry Andric     if (TCR_4(thread_data->td.td_deque_ntasks) >=
43965ffd83dbSDimitry Andric         TASK_DEQUE_SIZE(thread_data->td)) {
43975ffd83dbSDimitry Andric       // expand deque to push the task which is not allowed to execute
43980b57cec5SDimitry Andric       __kmp_realloc_task_deque(thread, thread_data);
43995ffd83dbSDimitry Andric     }
44000b57cec5SDimitry Andric 
44010b57cec5SDimitry Andric   } else {
44020b57cec5SDimitry Andric 
44030b57cec5SDimitry Andric     __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
44040b57cec5SDimitry Andric 
44050b57cec5SDimitry Andric     if (TCR_4(thread_data->td.td_deque_ntasks) >=
44060b57cec5SDimitry Andric         TASK_DEQUE_SIZE(thread_data->td)) {
44070b57cec5SDimitry Andric       KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to "
44080b57cec5SDimitry Andric                     "thread %d.\n",
44090b57cec5SDimitry Andric                     taskdata, tid));
44100b57cec5SDimitry Andric 
44110b57cec5SDimitry Andric       // if this deque is bigger than the pass ratio give a chance to another
44120b57cec5SDimitry Andric       // thread
44130b57cec5SDimitry Andric       if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
44140b57cec5SDimitry Andric         goto release_and_exit;
44150b57cec5SDimitry Andric 
44160b57cec5SDimitry Andric       __kmp_realloc_task_deque(thread, thread_data);
44170b57cec5SDimitry Andric     }
44180b57cec5SDimitry Andric   }
44190b57cec5SDimitry Andric 
44200b57cec5SDimitry Andric   // lock is held here, and there is space in the deque
44210b57cec5SDimitry Andric 
44220b57cec5SDimitry Andric   thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
44230b57cec5SDimitry Andric   // Wrap index.
44240b57cec5SDimitry Andric   thread_data->td.td_deque_tail =
44250b57cec5SDimitry Andric       (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
44260b57cec5SDimitry Andric   TCW_4(thread_data->td.td_deque_ntasks,
44270b57cec5SDimitry Andric         TCR_4(thread_data->td.td_deque_ntasks) + 1);
44280b57cec5SDimitry Andric 
44290b57cec5SDimitry Andric   result = true;
44300b57cec5SDimitry Andric   KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n",
44310b57cec5SDimitry Andric                 taskdata, tid));
44320b57cec5SDimitry Andric 
44330b57cec5SDimitry Andric release_and_exit:
44340b57cec5SDimitry Andric   __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
44350b57cec5SDimitry Andric 
44360b57cec5SDimitry Andric   return result;
44370b57cec5SDimitry Andric }
44380b57cec5SDimitry Andric 
4439fe6060f1SDimitry Andric #define PROXY_TASK_FLAG 0x40000000
44400b57cec5SDimitry Andric /* The finish of the proxy tasks is divided in two pieces:
44410b57cec5SDimitry Andric     - the top half is the one that can be done from a thread outside the team
44420b57cec5SDimitry Andric     - the bottom half must be run from a thread within the team
44430b57cec5SDimitry Andric 
44440b57cec5SDimitry Andric    In order to run the bottom half the task gets queued back into one of the
44450b57cec5SDimitry Andric    threads of the team. Once the td_incomplete_child_task counter of the parent
44460b57cec5SDimitry Andric    is decremented the threads can leave the barriers. So, the bottom half needs
44470b57cec5SDimitry Andric    to be queued before the counter is decremented. The top half is therefore
44480b57cec5SDimitry Andric    divided in two parts:
44490b57cec5SDimitry Andric     - things that can be run before queuing the bottom half
44500b57cec5SDimitry Andric     - things that must be run after queuing the bottom half
44510b57cec5SDimitry Andric 
44520b57cec5SDimitry Andric    This creates a second race as the bottom half can free the task before the
44530b57cec5SDimitry Andric    second top half is executed. To avoid this we use the
44540b57cec5SDimitry Andric    td_incomplete_child_task of the proxy task to synchronize the top and bottom
44550b57cec5SDimitry Andric    half. */
44560b57cec5SDimitry Andric static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
44570b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
44580b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
44590b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
44600b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
44610b57cec5SDimitry Andric 
44620b57cec5SDimitry Andric   taskdata->td_flags.complete = 1; // mark the task as completed
446306c3fb27SDimitry Andric #if OMPX_TASKGRAPH
446406c3fb27SDimitry Andric   taskdata->td_flags.onced = 1;
446506c3fb27SDimitry Andric #endif
44660b57cec5SDimitry Andric 
44670b57cec5SDimitry Andric   if (taskdata->td_taskgroup)
44680b57cec5SDimitry Andric     KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
44690b57cec5SDimitry Andric 
44700b57cec5SDimitry Andric   // Create an imaginary children for this task so the bottom half cannot
44710b57cec5SDimitry Andric   // release the task before we have completed the second top half
4472fe6060f1SDimitry Andric   KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG);
44730b57cec5SDimitry Andric }
44740b57cec5SDimitry Andric 
44750b57cec5SDimitry Andric static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4476349cc55cSDimitry Andric #if KMP_DEBUG
44770b57cec5SDimitry Andric   kmp_int32 children = 0;
44780b57cec5SDimitry Andric   // Predecrement simulated by "- 1" calculation
4479349cc55cSDimitry Andric   children = -1 +
4480349cc55cSDimitry Andric #endif
4481349cc55cSDimitry Andric       KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
44820b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(children >= 0);
44830b57cec5SDimitry Andric 
44840b57cec5SDimitry Andric   // Remove the imaginary children
4485fe6060f1SDimitry Andric   KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG);
44860b57cec5SDimitry Andric }
44870b57cec5SDimitry Andric 
44880b57cec5SDimitry Andric static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
44890b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
44900b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
44910b57cec5SDimitry Andric 
44920b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
44930b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
44940b57cec5SDimitry Andric                    1); // top half must run before bottom half
44950b57cec5SDimitry Andric 
44960b57cec5SDimitry Andric   // We need to wait to make sure the top half is finished
44970b57cec5SDimitry Andric   // Spinning here should be ok as this should happen quickly
4498fe6060f1SDimitry Andric   while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) &
4499fe6060f1SDimitry Andric           PROXY_TASK_FLAG) > 0)
45000b57cec5SDimitry Andric     ;
45010b57cec5SDimitry Andric 
45020b57cec5SDimitry Andric   __kmp_release_deps(gtid, taskdata);
45030b57cec5SDimitry Andric   __kmp_free_task_and_ancestors(gtid, taskdata, thread);
45040b57cec5SDimitry Andric }
45050b57cec5SDimitry Andric 
45060b57cec5SDimitry Andric /*!
45070b57cec5SDimitry Andric @ingroup TASKING
45080b57cec5SDimitry Andric @param gtid Global Thread ID of encountering thread
45090b57cec5SDimitry Andric @param ptask Task which execution is completed
45100b57cec5SDimitry Andric 
4511480093f4SDimitry Andric Execute the completion of a proxy task from a thread of that is part of the
45120b57cec5SDimitry Andric team. Run first and bottom halves directly.
45130b57cec5SDimitry Andric */
45140b57cec5SDimitry Andric void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
45150b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(ptask != NULL);
45160b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
45170b57cec5SDimitry Andric   KA_TRACE(
45180b57cec5SDimitry Andric       10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
45190b57cec5SDimitry Andric            gtid, taskdata));
4520e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
45210b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
45220b57cec5SDimitry Andric 
45230b57cec5SDimitry Andric   __kmp_first_top_half_finish_proxy(taskdata);
45240b57cec5SDimitry Andric   __kmp_second_top_half_finish_proxy(taskdata);
45250b57cec5SDimitry Andric   __kmp_bottom_half_finish_proxy(gtid, ptask);
45260b57cec5SDimitry Andric 
45270b57cec5SDimitry Andric   KA_TRACE(10,
45280b57cec5SDimitry Andric            ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
45290b57cec5SDimitry Andric             gtid, taskdata));
45300b57cec5SDimitry Andric }
45310b57cec5SDimitry Andric 
4532fe6060f1SDimitry Andric void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
4533fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(ptask != NULL);
4534fe6060f1SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4535fe6060f1SDimitry Andric 
4536fe6060f1SDimitry Andric   // Enqueue task to complete bottom half completion from a thread within the
4537fe6060f1SDimitry Andric   // corresponding team
4538fe6060f1SDimitry Andric   kmp_team_t *team = taskdata->td_team;
4539fe6060f1SDimitry Andric   kmp_int32 nthreads = team->t.t_nproc;
4540fe6060f1SDimitry Andric   kmp_info_t *thread;
4541fe6060f1SDimitry Andric 
4542fe6060f1SDimitry Andric   // This should be similar to start_k = __kmp_get_random( thread ) % nthreads
4543fe6060f1SDimitry Andric   // but we cannot use __kmp_get_random here
4544349cc55cSDimitry Andric   kmp_int32 start_k = start % nthreads;
4545fe6060f1SDimitry Andric   kmp_int32 pass = 1;
4546fe6060f1SDimitry Andric   kmp_int32 k = start_k;
4547fe6060f1SDimitry Andric 
4548fe6060f1SDimitry Andric   do {
4549fe6060f1SDimitry Andric     // For now we're just linearly trying to find a thread
4550fe6060f1SDimitry Andric     thread = team->t.t_threads[k];
4551fe6060f1SDimitry Andric     k = (k + 1) % nthreads;
4552fe6060f1SDimitry Andric 
4553fe6060f1SDimitry Andric     // we did a full pass through all the threads
4554fe6060f1SDimitry Andric     if (k == start_k)
4555fe6060f1SDimitry Andric       pass = pass << 1;
4556fe6060f1SDimitry Andric 
4557fe6060f1SDimitry Andric   } while (!__kmp_give_task(thread, k, ptask, pass));
455881ad6265SDimitry Andric 
455981ad6265SDimitry Andric   if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && __kmp_wpolicy_passive) {
456081ad6265SDimitry Andric     // awake at least one thread to execute given task
456181ad6265SDimitry Andric     for (int i = 0; i < nthreads; ++i) {
456281ad6265SDimitry Andric       thread = team->t.t_threads[i];
456381ad6265SDimitry Andric       if (thread->th.th_sleep_loc != NULL) {
456481ad6265SDimitry Andric         __kmp_null_resume_wrapper(thread);
456581ad6265SDimitry Andric         break;
456681ad6265SDimitry Andric       }
456781ad6265SDimitry Andric     }
456881ad6265SDimitry Andric   }
4569fe6060f1SDimitry Andric }
4570fe6060f1SDimitry Andric 
45710b57cec5SDimitry Andric /*!
45720b57cec5SDimitry Andric @ingroup TASKING
45730b57cec5SDimitry Andric @param ptask Task which execution is completed
45740b57cec5SDimitry Andric 
4575480093f4SDimitry Andric Execute the completion of a proxy task from a thread that could not belong to
45760b57cec5SDimitry Andric the team.
45770b57cec5SDimitry Andric */
45780b57cec5SDimitry Andric void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
45790b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(ptask != NULL);
45800b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
45810b57cec5SDimitry Andric 
45820b57cec5SDimitry Andric   KA_TRACE(
45830b57cec5SDimitry Andric       10,
45840b57cec5SDimitry Andric       ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
45850b57cec5SDimitry Andric        taskdata));
45860b57cec5SDimitry Andric 
45870b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
45880b57cec5SDimitry Andric 
45890b57cec5SDimitry Andric   __kmp_first_top_half_finish_proxy(taskdata);
45900b57cec5SDimitry Andric 
4591fe6060f1SDimitry Andric   __kmpc_give_task(ptask);
45920b57cec5SDimitry Andric 
45930b57cec5SDimitry Andric   __kmp_second_top_half_finish_proxy(taskdata);
45940b57cec5SDimitry Andric 
45950b57cec5SDimitry Andric   KA_TRACE(
45960b57cec5SDimitry Andric       10,
45970b57cec5SDimitry Andric       ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
45980b57cec5SDimitry Andric        taskdata));
45990b57cec5SDimitry Andric }
46000b57cec5SDimitry Andric 
46010b57cec5SDimitry Andric kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, int gtid,
46020b57cec5SDimitry Andric                                                 kmp_task_t *task) {
46030b57cec5SDimitry Andric   kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
46040b57cec5SDimitry Andric   if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
46050b57cec5SDimitry Andric     td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
46060b57cec5SDimitry Andric     td->td_allow_completion_event.ed.task = task;
46070b57cec5SDimitry Andric     __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
46080b57cec5SDimitry Andric   }
46090b57cec5SDimitry Andric   return &td->td_allow_completion_event;
46100b57cec5SDimitry Andric }
46110b57cec5SDimitry Andric 
46120b57cec5SDimitry Andric void __kmp_fulfill_event(kmp_event_t *event) {
46130b57cec5SDimitry Andric   if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
46140b57cec5SDimitry Andric     kmp_task_t *ptask = event->ed.task;
46150b57cec5SDimitry Andric     kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
46160b57cec5SDimitry Andric     bool detached = false;
46170b57cec5SDimitry Andric     int gtid = __kmp_get_gtid();
46180b57cec5SDimitry Andric 
46195ffd83dbSDimitry Andric     // The associated task might have completed or could be completing at this
46200b57cec5SDimitry Andric     // point.
46210b57cec5SDimitry Andric     // We need to take the lock to avoid races
46220b57cec5SDimitry Andric     __kmp_acquire_tas_lock(&event->lock, gtid);
46235ffd83dbSDimitry Andric     if (taskdata->td_flags.proxy == TASK_PROXY) {
46240b57cec5SDimitry Andric       detached = true;
46255ffd83dbSDimitry Andric     } else {
46265ffd83dbSDimitry Andric #if OMPT_SUPPORT
46275ffd83dbSDimitry Andric       // The OMPT event must occur under mutual exclusion,
46285ffd83dbSDimitry Andric       // otherwise the tool might access ptask after free
46295ffd83dbSDimitry Andric       if (UNLIKELY(ompt_enabled.enabled))
46305ffd83dbSDimitry Andric         __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
46315ffd83dbSDimitry Andric #endif
46325ffd83dbSDimitry Andric     }
46330b57cec5SDimitry Andric     event->type = KMP_EVENT_UNINITIALIZED;
46340b57cec5SDimitry Andric     __kmp_release_tas_lock(&event->lock, gtid);
46350b57cec5SDimitry Andric 
46360b57cec5SDimitry Andric     if (detached) {
46375ffd83dbSDimitry Andric #if OMPT_SUPPORT
46385ffd83dbSDimitry Andric       // We free ptask afterwards and know the task is finished,
46395ffd83dbSDimitry Andric       // so locking is not necessary
46405ffd83dbSDimitry Andric       if (UNLIKELY(ompt_enabled.enabled))
46415ffd83dbSDimitry Andric         __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
46425ffd83dbSDimitry Andric #endif
46430b57cec5SDimitry Andric       // If the task detached complete the proxy task
46440b57cec5SDimitry Andric       if (gtid >= 0) {
46450b57cec5SDimitry Andric         kmp_team_t *team = taskdata->td_team;
46460b57cec5SDimitry Andric         kmp_info_t *thread = __kmp_get_thread();
46470b57cec5SDimitry Andric         if (thread->th.th_team == team) {
46480b57cec5SDimitry Andric           __kmpc_proxy_task_completed(gtid, ptask);
46490b57cec5SDimitry Andric           return;
46500b57cec5SDimitry Andric         }
46510b57cec5SDimitry Andric       }
46520b57cec5SDimitry Andric 
46530b57cec5SDimitry Andric       // fallback
46540b57cec5SDimitry Andric       __kmpc_proxy_task_completed_ooo(ptask);
46550b57cec5SDimitry Andric     }
46560b57cec5SDimitry Andric   }
46570b57cec5SDimitry Andric }
46580b57cec5SDimitry Andric 
46590b57cec5SDimitry Andric // __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task
46600b57cec5SDimitry Andric // for taskloop
46610b57cec5SDimitry Andric //
46620b57cec5SDimitry Andric // thread:   allocating thread
46630b57cec5SDimitry Andric // task_src: pointer to source task to be duplicated
466406c3fb27SDimitry Andric // taskloop_recur: used only when dealing with taskgraph,
466506c3fb27SDimitry Andric //      indicating whether we need to update task->td_task_id
46660b57cec5SDimitry Andric // returns:  a pointer to the allocated kmp_task_t structure (task).
466706c3fb27SDimitry Andric kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src
466806c3fb27SDimitry Andric #if OMPX_TASKGRAPH
466906c3fb27SDimitry Andric                                  , int taskloop_recur
467006c3fb27SDimitry Andric #endif
467106c3fb27SDimitry Andric ) {
46720b57cec5SDimitry Andric   kmp_task_t *task;
46730b57cec5SDimitry Andric   kmp_taskdata_t *taskdata;
46745ffd83dbSDimitry Andric   kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
46755ffd83dbSDimitry Andric   kmp_taskdata_t *parent_task = taskdata_src->td_parent; // same parent task
46760b57cec5SDimitry Andric   size_t shareds_offset;
46770b57cec5SDimitry Andric   size_t task_size;
46780b57cec5SDimitry Andric 
46790b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
46800b57cec5SDimitry Andric                 task_src));
46810b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
46820b57cec5SDimitry Andric                    TASK_FULL); // it should not be proxy task
46830b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
46840b57cec5SDimitry Andric   task_size = taskdata_src->td_size_alloc;
46850b57cec5SDimitry Andric 
46860b57cec5SDimitry Andric   // Allocate a kmp_taskdata_t block and a kmp_task_t block.
46870b57cec5SDimitry Andric   KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
46880b57cec5SDimitry Andric                 task_size));
46890b57cec5SDimitry Andric #if USE_FAST_MEMORY
46900b57cec5SDimitry Andric   taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
46910b57cec5SDimitry Andric #else
46920b57cec5SDimitry Andric   taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
46930b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */
46940b57cec5SDimitry Andric   KMP_MEMCPY(taskdata, taskdata_src, task_size);
46950b57cec5SDimitry Andric 
46960b57cec5SDimitry Andric   task = KMP_TASKDATA_TO_TASK(taskdata);
46970b57cec5SDimitry Andric 
46980b57cec5SDimitry Andric   // Initialize new task (only specific fields not affected by memcpy)
469906c3fb27SDimitry Andric #if OMPX_TASKGRAPH
470006c3fb27SDimitry Andric   if (!taskdata->is_taskgraph || taskloop_recur)
47010b57cec5SDimitry Andric     taskdata->td_task_id = KMP_GEN_TASK_ID();
470206c3fb27SDimitry Andric   else if (taskdata->is_taskgraph &&
470306c3fb27SDimitry Andric            __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
470406c3fb27SDimitry Andric     taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
470506c3fb27SDimitry Andric #else
470606c3fb27SDimitry Andric   taskdata->td_task_id = KMP_GEN_TASK_ID();
470706c3fb27SDimitry Andric #endif
47080b57cec5SDimitry Andric   if (task->shareds != NULL) { // need setup shareds pointer
47090b57cec5SDimitry Andric     shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
47100b57cec5SDimitry Andric     task->shareds = &((char *)taskdata)[shareds_offset];
47110b57cec5SDimitry Andric     KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
47120b57cec5SDimitry Andric                      0);
47130b57cec5SDimitry Andric   }
47140b57cec5SDimitry Andric   taskdata->td_alloc_thread = thread;
47150b57cec5SDimitry Andric   taskdata->td_parent = parent_task;
47165ffd83dbSDimitry Andric   // task inherits the taskgroup from the parent task
47175ffd83dbSDimitry Andric   taskdata->td_taskgroup = parent_task->td_taskgroup;
47185ffd83dbSDimitry Andric   // tied task needs to initialize the td_last_tied at creation,
47195ffd83dbSDimitry Andric   // untied one does this when it is scheduled for execution
47205ffd83dbSDimitry Andric   if (taskdata->td_flags.tiedness == TASK_TIED)
47215ffd83dbSDimitry Andric     taskdata->td_last_tied = taskdata;
47220b57cec5SDimitry Andric 
47230b57cec5SDimitry Andric   // Only need to keep track of child task counts if team parallel and tasking
47240b57cec5SDimitry Andric   // not serialized
47250b57cec5SDimitry Andric   if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
47260b57cec5SDimitry Andric     KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
47270b57cec5SDimitry Andric     if (parent_task->td_taskgroup)
47280b57cec5SDimitry Andric       KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
47290b57cec5SDimitry Andric     // Only need to keep track of allocated child tasks for explicit tasks since
47300b57cec5SDimitry Andric     // implicit not deallocated
47310b57cec5SDimitry Andric     if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
47320b57cec5SDimitry Andric       KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
47330b57cec5SDimitry Andric   }
47340b57cec5SDimitry Andric 
47350b57cec5SDimitry Andric   KA_TRACE(20,
47360b57cec5SDimitry Andric            ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
47370b57cec5SDimitry Andric             thread, taskdata, taskdata->td_parent));
47380b57cec5SDimitry Andric #if OMPT_SUPPORT
47390b57cec5SDimitry Andric   if (UNLIKELY(ompt_enabled.enabled))
47400b57cec5SDimitry Andric     __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
47410b57cec5SDimitry Andric #endif
47420b57cec5SDimitry Andric   return task;
47430b57cec5SDimitry Andric }
47440b57cec5SDimitry Andric 
47450b57cec5SDimitry Andric // Routine optionally generated by the compiler for setting the lastprivate flag
47460b57cec5SDimitry Andric // and calling needed constructors for private/firstprivate objects
47470b57cec5SDimitry Andric // (used to form taskloop tasks from pattern task)
47480b57cec5SDimitry Andric // Parameters: dest task, src task, lastprivate flag.
47490b57cec5SDimitry Andric typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
47500b57cec5SDimitry Andric 
47510b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(long) == 4 || sizeof(long) == 8);
47520b57cec5SDimitry Andric 
47530b57cec5SDimitry Andric // class to encapsulate manipulating loop bounds in a taskloop task.
47540b57cec5SDimitry Andric // this abstracts away the Intel vs GOMP taskloop interface for setting/getting
47550b57cec5SDimitry Andric // the loop bound variables.
47560b57cec5SDimitry Andric class kmp_taskloop_bounds_t {
47570b57cec5SDimitry Andric   kmp_task_t *task;
47580b57cec5SDimitry Andric   const kmp_taskdata_t *taskdata;
47590b57cec5SDimitry Andric   size_t lower_offset;
47600b57cec5SDimitry Andric   size_t upper_offset;
47610b57cec5SDimitry Andric 
47620b57cec5SDimitry Andric public:
47630b57cec5SDimitry Andric   kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
47640b57cec5SDimitry Andric       : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
47650b57cec5SDimitry Andric         lower_offset((char *)lb - (char *)task),
47660b57cec5SDimitry Andric         upper_offset((char *)ub - (char *)task) {
47670b57cec5SDimitry Andric     KMP_DEBUG_ASSERT((char *)lb > (char *)_task);
47680b57cec5SDimitry Andric     KMP_DEBUG_ASSERT((char *)ub > (char *)_task);
47690b57cec5SDimitry Andric   }
47700b57cec5SDimitry Andric   kmp_taskloop_bounds_t(kmp_task_t *_task, const kmp_taskloop_bounds_t &bounds)
47710b57cec5SDimitry Andric       : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
47720b57cec5SDimitry Andric         lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
47730b57cec5SDimitry Andric   size_t get_lower_offset() const { return lower_offset; }
47740b57cec5SDimitry Andric   size_t get_upper_offset() const { return upper_offset; }
47750b57cec5SDimitry Andric   kmp_uint64 get_lb() const {
47760b57cec5SDimitry Andric     kmp_int64 retval;
47770b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
47780b57cec5SDimitry Andric     // Intel task just returns the lower bound normally
47790b57cec5SDimitry Andric     if (!taskdata->td_flags.native) {
47800b57cec5SDimitry Andric       retval = *(kmp_int64 *)((char *)task + lower_offset);
47810b57cec5SDimitry Andric     } else {
47820b57cec5SDimitry Andric       // GOMP task has to take into account the sizeof(long)
47830b57cec5SDimitry Andric       if (taskdata->td_size_loop_bounds == 4) {
47840b57cec5SDimitry Andric         kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
47850b57cec5SDimitry Andric         retval = (kmp_int64)*lb;
47860b57cec5SDimitry Andric       } else {
47870b57cec5SDimitry Andric         kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
47880b57cec5SDimitry Andric         retval = (kmp_int64)*lb;
47890b57cec5SDimitry Andric       }
47900b57cec5SDimitry Andric     }
47910b57cec5SDimitry Andric #else
4792fe6060f1SDimitry Andric     (void)taskdata;
47930b57cec5SDimitry Andric     retval = *(kmp_int64 *)((char *)task + lower_offset);
47940b57cec5SDimitry Andric #endif // defined(KMP_GOMP_COMPAT)
47950b57cec5SDimitry Andric     return retval;
47960b57cec5SDimitry Andric   }
47970b57cec5SDimitry Andric   kmp_uint64 get_ub() const {
47980b57cec5SDimitry Andric     kmp_int64 retval;
47990b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
48000b57cec5SDimitry Andric     // Intel task just returns the upper bound normally
48010b57cec5SDimitry Andric     if (!taskdata->td_flags.native) {
48020b57cec5SDimitry Andric       retval = *(kmp_int64 *)((char *)task + upper_offset);
48030b57cec5SDimitry Andric     } else {
48040b57cec5SDimitry Andric       // GOMP task has to take into account the sizeof(long)
48050b57cec5SDimitry Andric       if (taskdata->td_size_loop_bounds == 4) {
48060b57cec5SDimitry Andric         kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
48070b57cec5SDimitry Andric         retval = (kmp_int64)*ub;
48080b57cec5SDimitry Andric       } else {
48090b57cec5SDimitry Andric         kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
48100b57cec5SDimitry Andric         retval = (kmp_int64)*ub;
48110b57cec5SDimitry Andric       }
48120b57cec5SDimitry Andric     }
48130b57cec5SDimitry Andric #else
48140b57cec5SDimitry Andric     retval = *(kmp_int64 *)((char *)task + upper_offset);
48150b57cec5SDimitry Andric #endif // defined(KMP_GOMP_COMPAT)
48160b57cec5SDimitry Andric     return retval;
48170b57cec5SDimitry Andric   }
48180b57cec5SDimitry Andric   void set_lb(kmp_uint64 lb) {
48190b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
48200b57cec5SDimitry Andric     // Intel task just sets the lower bound normally
48210b57cec5SDimitry Andric     if (!taskdata->td_flags.native) {
48220b57cec5SDimitry Andric       *(kmp_uint64 *)((char *)task + lower_offset) = lb;
48230b57cec5SDimitry Andric     } else {
48240b57cec5SDimitry Andric       // GOMP task has to take into account the sizeof(long)
48250b57cec5SDimitry Andric       if (taskdata->td_size_loop_bounds == 4) {
48260b57cec5SDimitry Andric         kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
48270b57cec5SDimitry Andric         *lower = (kmp_uint32)lb;
48280b57cec5SDimitry Andric       } else {
48290b57cec5SDimitry Andric         kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
48300b57cec5SDimitry Andric         *lower = (kmp_uint64)lb;
48310b57cec5SDimitry Andric       }
48320b57cec5SDimitry Andric     }
48330b57cec5SDimitry Andric #else
48340b57cec5SDimitry Andric     *(kmp_uint64 *)((char *)task + lower_offset) = lb;
48350b57cec5SDimitry Andric #endif // defined(KMP_GOMP_COMPAT)
48360b57cec5SDimitry Andric   }
48370b57cec5SDimitry Andric   void set_ub(kmp_uint64 ub) {
48380b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
48390b57cec5SDimitry Andric     // Intel task just sets the upper bound normally
48400b57cec5SDimitry Andric     if (!taskdata->td_flags.native) {
48410b57cec5SDimitry Andric       *(kmp_uint64 *)((char *)task + upper_offset) = ub;
48420b57cec5SDimitry Andric     } else {
48430b57cec5SDimitry Andric       // GOMP task has to take into account the sizeof(long)
48440b57cec5SDimitry Andric       if (taskdata->td_size_loop_bounds == 4) {
48450b57cec5SDimitry Andric         kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
48460b57cec5SDimitry Andric         *upper = (kmp_uint32)ub;
48470b57cec5SDimitry Andric       } else {
48480b57cec5SDimitry Andric         kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
48490b57cec5SDimitry Andric         *upper = (kmp_uint64)ub;
48500b57cec5SDimitry Andric       }
48510b57cec5SDimitry Andric     }
48520b57cec5SDimitry Andric #else
48530b57cec5SDimitry Andric     *(kmp_uint64 *)((char *)task + upper_offset) = ub;
48540b57cec5SDimitry Andric #endif // defined(KMP_GOMP_COMPAT)
48550b57cec5SDimitry Andric   }
48560b57cec5SDimitry Andric };
48570b57cec5SDimitry Andric 
48580b57cec5SDimitry Andric // __kmp_taskloop_linear: Start tasks of the taskloop linearly
48590b57cec5SDimitry Andric //
48600b57cec5SDimitry Andric // loc        Source location information
48610b57cec5SDimitry Andric // gtid       Global thread ID
48620b57cec5SDimitry Andric // task       Pattern task, exposes the loop iteration range
48630b57cec5SDimitry Andric // lb         Pointer to loop lower bound in task structure
48640b57cec5SDimitry Andric // ub         Pointer to loop upper bound in task structure
48650b57cec5SDimitry Andric // st         Loop stride
48660b57cec5SDimitry Andric // ub_glob    Global upper bound (used for lastprivate check)
48670b57cec5SDimitry Andric // num_tasks  Number of tasks to execute
48680b57cec5SDimitry Andric // grainsize  Number of loop iterations per task
48690b57cec5SDimitry Andric // extras     Number of chunks with grainsize+1 iterations
4870e8d8bef9SDimitry Andric // last_chunk Reduction of grainsize for last task
48710b57cec5SDimitry Andric // tc         Iterations count
48720b57cec5SDimitry Andric // task_dup   Tasks duplication routine
48730b57cec5SDimitry Andric // codeptr_ra Return address for OMPT events
48740b57cec5SDimitry Andric void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
48750b57cec5SDimitry Andric                            kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
48760b57cec5SDimitry Andric                            kmp_uint64 ub_glob, kmp_uint64 num_tasks,
48770b57cec5SDimitry Andric                            kmp_uint64 grainsize, kmp_uint64 extras,
4878e8d8bef9SDimitry Andric                            kmp_int64 last_chunk, kmp_uint64 tc,
48790b57cec5SDimitry Andric #if OMPT_SUPPORT
48800b57cec5SDimitry Andric                            void *codeptr_ra,
48810b57cec5SDimitry Andric #endif
48820b57cec5SDimitry Andric                            void *task_dup) {
48830b57cec5SDimitry Andric   KMP_COUNT_BLOCK(OMP_TASKLOOP);
48840b57cec5SDimitry Andric   KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
48850b57cec5SDimitry Andric   p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
48860b57cec5SDimitry Andric   // compiler provides global bounds here
48870b57cec5SDimitry Andric   kmp_taskloop_bounds_t task_bounds(task, lb, ub);
48880b57cec5SDimitry Andric   kmp_uint64 lower = task_bounds.get_lb();
48890b57cec5SDimitry Andric   kmp_uint64 upper = task_bounds.get_ub();
48900b57cec5SDimitry Andric   kmp_uint64 i;
48910b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
48920b57cec5SDimitry Andric   kmp_taskdata_t *current_task = thread->th.th_current_task;
48930b57cec5SDimitry Andric   kmp_task_t *next_task;
48940b57cec5SDimitry Andric   kmp_int32 lastpriv = 0;
48950b57cec5SDimitry Andric 
4896fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4897fe6060f1SDimitry Andric                              (last_chunk < 0 ? last_chunk : extras));
48980b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(num_tasks > extras);
48990b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(num_tasks > 0);
49000b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
4901e8d8bef9SDimitry Andric                 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4902e8d8bef9SDimitry Andric                 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
4903e8d8bef9SDimitry Andric                 ub_glob, st, task_dup));
49040b57cec5SDimitry Andric 
49050b57cec5SDimitry Andric   // Launch num_tasks tasks, assign grainsize iterations each task
49060b57cec5SDimitry Andric   for (i = 0; i < num_tasks; ++i) {
49070b57cec5SDimitry Andric     kmp_uint64 chunk_minus_1;
49080b57cec5SDimitry Andric     if (extras == 0) {
49090b57cec5SDimitry Andric       chunk_minus_1 = grainsize - 1;
49100b57cec5SDimitry Andric     } else {
49110b57cec5SDimitry Andric       chunk_minus_1 = grainsize;
49120b57cec5SDimitry Andric       --extras; // first extras iterations get bigger chunk (grainsize+1)
49130b57cec5SDimitry Andric     }
49140b57cec5SDimitry Andric     upper = lower + st * chunk_minus_1;
4915e8d8bef9SDimitry Andric     if (upper > *ub) {
4916e8d8bef9SDimitry Andric       upper = *ub;
4917e8d8bef9SDimitry Andric     }
49180b57cec5SDimitry Andric     if (i == num_tasks - 1) {
49190b57cec5SDimitry Andric       // schedule the last task, set lastprivate flag if needed
49200b57cec5SDimitry Andric       if (st == 1) { // most common case
49210b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(upper == *ub);
49220b57cec5SDimitry Andric         if (upper == ub_glob)
49230b57cec5SDimitry Andric           lastpriv = 1;
49240b57cec5SDimitry Andric       } else if (st > 0) { // positive loop stride
49250b57cec5SDimitry Andric         KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
49260b57cec5SDimitry Andric         if ((kmp_uint64)st > ub_glob - upper)
49270b57cec5SDimitry Andric           lastpriv = 1;
49280b57cec5SDimitry Andric       } else { // negative loop stride
49290b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(upper + st < *ub);
49300b57cec5SDimitry Andric         if (upper - ub_glob < (kmp_uint64)(-st))
49310b57cec5SDimitry Andric           lastpriv = 1;
49320b57cec5SDimitry Andric       }
49330b57cec5SDimitry Andric     }
493406c3fb27SDimitry Andric 
493506c3fb27SDimitry Andric #if OMPX_TASKGRAPH
493606c3fb27SDimitry Andric     next_task = __kmp_task_dup_alloc(thread, task, /* taskloop_recur */ 0);
493706c3fb27SDimitry Andric #else
49380b57cec5SDimitry Andric     next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
493906c3fb27SDimitry Andric #endif
494006c3fb27SDimitry Andric 
49410b57cec5SDimitry Andric     kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
49420b57cec5SDimitry Andric     kmp_taskloop_bounds_t next_task_bounds =
49430b57cec5SDimitry Andric         kmp_taskloop_bounds_t(next_task, task_bounds);
49440b57cec5SDimitry Andric 
49450b57cec5SDimitry Andric     // adjust task-specific bounds
49460b57cec5SDimitry Andric     next_task_bounds.set_lb(lower);
49470b57cec5SDimitry Andric     if (next_taskdata->td_flags.native) {
49480b57cec5SDimitry Andric       next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
49490b57cec5SDimitry Andric     } else {
49500b57cec5SDimitry Andric       next_task_bounds.set_ub(upper);
49510b57cec5SDimitry Andric     }
4952480093f4SDimitry Andric     if (ptask_dup != NULL) // set lastprivate flag, construct firstprivates,
4953480093f4SDimitry Andric                            // etc.
49540b57cec5SDimitry Andric       ptask_dup(next_task, task, lastpriv);
49550b57cec5SDimitry Andric     KA_TRACE(40,
49560b57cec5SDimitry Andric              ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
49570b57cec5SDimitry Andric               "upper %lld stride %lld, (offsets %p %p)\n",
49580b57cec5SDimitry Andric               gtid, i, next_task, lower, upper, st,
49590b57cec5SDimitry Andric               next_task_bounds.get_lower_offset(),
49600b57cec5SDimitry Andric               next_task_bounds.get_upper_offset()));
49610b57cec5SDimitry Andric #if OMPT_SUPPORT
49620b57cec5SDimitry Andric     __kmp_omp_taskloop_task(NULL, gtid, next_task,
49630b57cec5SDimitry Andric                             codeptr_ra); // schedule new task
496481ad6265SDimitry Andric #if OMPT_OPTIONAL
496581ad6265SDimitry Andric     if (ompt_enabled.ompt_callback_dispatch) {
496681ad6265SDimitry Andric       OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
496781ad6265SDimitry Andric                               lower, upper, st);
496881ad6265SDimitry Andric     }
496981ad6265SDimitry Andric #endif // OMPT_OPTIONAL
49700b57cec5SDimitry Andric #else
49710b57cec5SDimitry Andric     __kmp_omp_task(gtid, next_task, true); // schedule new task
49720b57cec5SDimitry Andric #endif
49730b57cec5SDimitry Andric     lower = upper + st; // adjust lower bound for the next iteration
49740b57cec5SDimitry Andric   }
49750b57cec5SDimitry Andric   // free the pattern task and exit
49760b57cec5SDimitry Andric   __kmp_task_start(gtid, task, current_task); // make internal bookkeeping
49770b57cec5SDimitry Andric   // do not execute the pattern task, just do internal bookkeeping
49780b57cec5SDimitry Andric   __kmp_task_finish<false>(gtid, task, current_task);
49790b57cec5SDimitry Andric }
49800b57cec5SDimitry Andric 
49810b57cec5SDimitry Andric // Structure to keep taskloop parameters for auxiliary task
49820b57cec5SDimitry Andric // kept in the shareds of the task structure.
49830b57cec5SDimitry Andric typedef struct __taskloop_params {
49840b57cec5SDimitry Andric   kmp_task_t *task;
49850b57cec5SDimitry Andric   kmp_uint64 *lb;
49860b57cec5SDimitry Andric   kmp_uint64 *ub;
49870b57cec5SDimitry Andric   void *task_dup;
49880b57cec5SDimitry Andric   kmp_int64 st;
49890b57cec5SDimitry Andric   kmp_uint64 ub_glob;
49900b57cec5SDimitry Andric   kmp_uint64 num_tasks;
49910b57cec5SDimitry Andric   kmp_uint64 grainsize;
49920b57cec5SDimitry Andric   kmp_uint64 extras;
4993e8d8bef9SDimitry Andric   kmp_int64 last_chunk;
49940b57cec5SDimitry Andric   kmp_uint64 tc;
49950b57cec5SDimitry Andric   kmp_uint64 num_t_min;
49960b57cec5SDimitry Andric #if OMPT_SUPPORT
49970b57cec5SDimitry Andric   void *codeptr_ra;
49980b57cec5SDimitry Andric #endif
49990b57cec5SDimitry Andric } __taskloop_params_t;
50000b57cec5SDimitry Andric 
50010b57cec5SDimitry Andric void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
50020b57cec5SDimitry Andric                           kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
5003e8d8bef9SDimitry Andric                           kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
5004e8d8bef9SDimitry Andric                           kmp_uint64,
50050b57cec5SDimitry Andric #if OMPT_SUPPORT
50060b57cec5SDimitry Andric                           void *,
50070b57cec5SDimitry Andric #endif
50080b57cec5SDimitry Andric                           void *);
50090b57cec5SDimitry Andric 
5010480093f4SDimitry Andric // Execute part of the taskloop submitted as a task.
50110b57cec5SDimitry Andric int __kmp_taskloop_task(int gtid, void *ptask) {
50120b57cec5SDimitry Andric   __taskloop_params_t *p =
50130b57cec5SDimitry Andric       (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
50140b57cec5SDimitry Andric   kmp_task_t *task = p->task;
50150b57cec5SDimitry Andric   kmp_uint64 *lb = p->lb;
50160b57cec5SDimitry Andric   kmp_uint64 *ub = p->ub;
50170b57cec5SDimitry Andric   void *task_dup = p->task_dup;
50180b57cec5SDimitry Andric   //  p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
50190b57cec5SDimitry Andric   kmp_int64 st = p->st;
50200b57cec5SDimitry Andric   kmp_uint64 ub_glob = p->ub_glob;
50210b57cec5SDimitry Andric   kmp_uint64 num_tasks = p->num_tasks;
50220b57cec5SDimitry Andric   kmp_uint64 grainsize = p->grainsize;
50230b57cec5SDimitry Andric   kmp_uint64 extras = p->extras;
5024e8d8bef9SDimitry Andric   kmp_int64 last_chunk = p->last_chunk;
50250b57cec5SDimitry Andric   kmp_uint64 tc = p->tc;
50260b57cec5SDimitry Andric   kmp_uint64 num_t_min = p->num_t_min;
50270b57cec5SDimitry Andric #if OMPT_SUPPORT
50280b57cec5SDimitry Andric   void *codeptr_ra = p->codeptr_ra;
50290b57cec5SDimitry Andric #endif
50300b57cec5SDimitry Andric #if KMP_DEBUG
50310b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
50320b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task != NULL);
5033e8d8bef9SDimitry Andric   KA_TRACE(20,
5034e8d8bef9SDimitry Andric            ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
5035e8d8bef9SDimitry Andric             " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
5036e8d8bef9SDimitry Andric             gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
5037e8d8bef9SDimitry Andric             st, task_dup));
50380b57cec5SDimitry Andric #endif
50390b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
50400b57cec5SDimitry Andric   if (num_tasks > num_t_min)
50410b57cec5SDimitry Andric     __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
5042e8d8bef9SDimitry Andric                          grainsize, extras, last_chunk, tc, num_t_min,
50430b57cec5SDimitry Andric #if OMPT_SUPPORT
50440b57cec5SDimitry Andric                          codeptr_ra,
50450b57cec5SDimitry Andric #endif
50460b57cec5SDimitry Andric                          task_dup);
50470b57cec5SDimitry Andric   else
50480b57cec5SDimitry Andric     __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
5049e8d8bef9SDimitry Andric                           grainsize, extras, last_chunk, tc,
50500b57cec5SDimitry Andric #if OMPT_SUPPORT
50510b57cec5SDimitry Andric                           codeptr_ra,
50520b57cec5SDimitry Andric #endif
50530b57cec5SDimitry Andric                           task_dup);
50540b57cec5SDimitry Andric 
50550b57cec5SDimitry Andric   KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid));
50560b57cec5SDimitry Andric   return 0;
50570b57cec5SDimitry Andric }
50580b57cec5SDimitry Andric 
5059480093f4SDimitry Andric // Schedule part of the taskloop as a task,
5060480093f4SDimitry Andric // execute the rest of the taskloop.
50610b57cec5SDimitry Andric //
50620b57cec5SDimitry Andric // loc        Source location information
50630b57cec5SDimitry Andric // gtid       Global thread ID
50640b57cec5SDimitry Andric // task       Pattern task, exposes the loop iteration range
50650b57cec5SDimitry Andric // lb         Pointer to loop lower bound in task structure
50660b57cec5SDimitry Andric // ub         Pointer to loop upper bound in task structure
50670b57cec5SDimitry Andric // st         Loop stride
50680b57cec5SDimitry Andric // ub_glob    Global upper bound (used for lastprivate check)
50690b57cec5SDimitry Andric // num_tasks  Number of tasks to execute
50700b57cec5SDimitry Andric // grainsize  Number of loop iterations per task
50710b57cec5SDimitry Andric // extras     Number of chunks with grainsize+1 iterations
5072e8d8bef9SDimitry Andric // last_chunk Reduction of grainsize for last task
50730b57cec5SDimitry Andric // tc         Iterations count
50745ffd83dbSDimitry Andric // num_t_min  Threshold to launch tasks recursively
50750b57cec5SDimitry Andric // task_dup   Tasks duplication routine
50760b57cec5SDimitry Andric // codeptr_ra Return address for OMPT events
50770b57cec5SDimitry Andric void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
50780b57cec5SDimitry Andric                           kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
50790b57cec5SDimitry Andric                           kmp_uint64 ub_glob, kmp_uint64 num_tasks,
50800b57cec5SDimitry Andric                           kmp_uint64 grainsize, kmp_uint64 extras,
5081e8d8bef9SDimitry Andric                           kmp_int64 last_chunk, kmp_uint64 tc,
5082e8d8bef9SDimitry Andric                           kmp_uint64 num_t_min,
50830b57cec5SDimitry Andric #if OMPT_SUPPORT
50840b57cec5SDimitry Andric                           void *codeptr_ra,
50850b57cec5SDimitry Andric #endif
50860b57cec5SDimitry Andric                           void *task_dup) {
50870b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
50880b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task != NULL);
50890b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(num_tasks > num_t_min);
5090e8d8bef9SDimitry Andric   KA_TRACE(20,
5091e8d8bef9SDimitry Andric            ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
5092e8d8bef9SDimitry Andric             " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
5093e8d8bef9SDimitry Andric             gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
5094e8d8bef9SDimitry Andric             st, task_dup));
50950b57cec5SDimitry Andric   p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
50960b57cec5SDimitry Andric   kmp_uint64 lower = *lb;
50970b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
50980b57cec5SDimitry Andric   //  kmp_taskdata_t *current_task = thread->th.th_current_task;
50990b57cec5SDimitry Andric   kmp_task_t *next_task;
51000b57cec5SDimitry Andric   size_t lower_offset =
51010b57cec5SDimitry Andric       (char *)lb - (char *)task; // remember offset of lb in the task structure
51020b57cec5SDimitry Andric   size_t upper_offset =
51030b57cec5SDimitry Andric       (char *)ub - (char *)task; // remember offset of ub in the task structure
51040b57cec5SDimitry Andric 
5105fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
5106fe6060f1SDimitry Andric                              (last_chunk < 0 ? last_chunk : extras));
51070b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(num_tasks > extras);
51080b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(num_tasks > 0);
51090b57cec5SDimitry Andric 
51100b57cec5SDimitry Andric   // split the loop in two halves
51110b57cec5SDimitry Andric   kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
5112e8d8bef9SDimitry Andric   kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
51130b57cec5SDimitry Andric   kmp_uint64 gr_size0 = grainsize;
51140b57cec5SDimitry Andric   kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute
51150b57cec5SDimitry Andric   kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task
5116e8d8bef9SDimitry Andric   if (last_chunk < 0) {
5117e8d8bef9SDimitry Andric     ext0 = ext1 = 0;
5118e8d8bef9SDimitry Andric     last_chunk1 = last_chunk;
5119e8d8bef9SDimitry Andric     tc0 = grainsize * n_tsk0;
5120e8d8bef9SDimitry Andric     tc1 = tc - tc0;
5121e8d8bef9SDimitry Andric   } else if (n_tsk0 <= extras) {
51220b57cec5SDimitry Andric     gr_size0++; // integrate extras into grainsize
51230b57cec5SDimitry Andric     ext0 = 0; // no extra iters in 1st half
51240b57cec5SDimitry Andric     ext1 = extras - n_tsk0; // remaining extras
51250b57cec5SDimitry Andric     tc0 = gr_size0 * n_tsk0;
51260b57cec5SDimitry Andric     tc1 = tc - tc0;
51270b57cec5SDimitry Andric   } else { // n_tsk0 > extras
51280b57cec5SDimitry Andric     ext1 = 0; // no extra iters in 2nd half
51290b57cec5SDimitry Andric     ext0 = extras;
51300b57cec5SDimitry Andric     tc1 = grainsize * n_tsk1;
51310b57cec5SDimitry Andric     tc0 = tc - tc1;
51320b57cec5SDimitry Andric   }
51330b57cec5SDimitry Andric   ub0 = lower + st * (tc0 - 1);
51340b57cec5SDimitry Andric   lb1 = ub0 + st;
51350b57cec5SDimitry Andric 
51360b57cec5SDimitry Andric   // create pattern task for 2nd half of the loop
513706c3fb27SDimitry Andric #if OMPX_TASKGRAPH
513806c3fb27SDimitry Andric   next_task = __kmp_task_dup_alloc(thread, task,
513906c3fb27SDimitry Andric                                    /* taskloop_recur */ 1);
514006c3fb27SDimitry Andric #else
51410b57cec5SDimitry Andric   next_task = __kmp_task_dup_alloc(thread, task); // duplicate the task
514206c3fb27SDimitry Andric #endif
51430b57cec5SDimitry Andric   // adjust lower bound (upper bound is not changed) for the 2nd half
51440b57cec5SDimitry Andric   *(kmp_uint64 *)((char *)next_task + lower_offset) = lb1;
5145480093f4SDimitry Andric   if (ptask_dup != NULL) // construct firstprivates, etc.
51460b57cec5SDimitry Andric     ptask_dup(next_task, task, 0);
51470b57cec5SDimitry Andric   *ub = ub0; // adjust upper bound for the 1st half
51480b57cec5SDimitry Andric 
51490b57cec5SDimitry Andric   // create auxiliary task for 2nd half of the loop
51505ffd83dbSDimitry Andric   // make sure new task has same parent task as the pattern task
51515ffd83dbSDimitry Andric   kmp_taskdata_t *current_task = thread->th.th_current_task;
51525ffd83dbSDimitry Andric   thread->th.th_current_task = taskdata->td_parent;
51530b57cec5SDimitry Andric   kmp_task_t *new_task =
51540b57cec5SDimitry Andric       __kmpc_omp_task_alloc(loc, gtid, 1, 3 * sizeof(void *),
51550b57cec5SDimitry Andric                             sizeof(__taskloop_params_t), &__kmp_taskloop_task);
51565ffd83dbSDimitry Andric   // restore current task
51575ffd83dbSDimitry Andric   thread->th.th_current_task = current_task;
51580b57cec5SDimitry Andric   __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
51590b57cec5SDimitry Andric   p->task = next_task;
51600b57cec5SDimitry Andric   p->lb = (kmp_uint64 *)((char *)next_task + lower_offset);
51610b57cec5SDimitry Andric   p->ub = (kmp_uint64 *)((char *)next_task + upper_offset);
51620b57cec5SDimitry Andric   p->task_dup = task_dup;
51630b57cec5SDimitry Andric   p->st = st;
51640b57cec5SDimitry Andric   p->ub_glob = ub_glob;
51650b57cec5SDimitry Andric   p->num_tasks = n_tsk1;
51660b57cec5SDimitry Andric   p->grainsize = grainsize;
51670b57cec5SDimitry Andric   p->extras = ext1;
5168e8d8bef9SDimitry Andric   p->last_chunk = last_chunk1;
51690b57cec5SDimitry Andric   p->tc = tc1;
51700b57cec5SDimitry Andric   p->num_t_min = num_t_min;
51710b57cec5SDimitry Andric #if OMPT_SUPPORT
51720b57cec5SDimitry Andric   p->codeptr_ra = codeptr_ra;
51730b57cec5SDimitry Andric #endif
51740b57cec5SDimitry Andric 
517506c3fb27SDimitry Andric #if OMPX_TASKGRAPH
517606c3fb27SDimitry Andric   kmp_taskdata_t *new_task_data = KMP_TASK_TO_TASKDATA(new_task);
517706c3fb27SDimitry Andric   new_task_data->tdg = taskdata->tdg;
517806c3fb27SDimitry Andric   new_task_data->is_taskgraph = 0;
517906c3fb27SDimitry Andric #endif
518006c3fb27SDimitry Andric 
51810b57cec5SDimitry Andric #if OMPT_SUPPORT
51820b57cec5SDimitry Andric   // schedule new task with correct return address for OMPT events
51830b57cec5SDimitry Andric   __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
51840b57cec5SDimitry Andric #else
51850b57cec5SDimitry Andric   __kmp_omp_task(gtid, new_task, true); // schedule new task
51860b57cec5SDimitry Andric #endif
51870b57cec5SDimitry Andric 
51880b57cec5SDimitry Andric   // execute the 1st half of current subrange
51890b57cec5SDimitry Andric   if (n_tsk0 > num_t_min)
51900b57cec5SDimitry Andric     __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
5191e8d8bef9SDimitry Andric                          ext0, last_chunk0, tc0, num_t_min,
51920b57cec5SDimitry Andric #if OMPT_SUPPORT
51930b57cec5SDimitry Andric                          codeptr_ra,
51940b57cec5SDimitry Andric #endif
51950b57cec5SDimitry Andric                          task_dup);
51960b57cec5SDimitry Andric   else
51970b57cec5SDimitry Andric     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
5198e8d8bef9SDimitry Andric                           gr_size0, ext0, last_chunk0, tc0,
51990b57cec5SDimitry Andric #if OMPT_SUPPORT
52000b57cec5SDimitry Andric                           codeptr_ra,
52010b57cec5SDimitry Andric #endif
52020b57cec5SDimitry Andric                           task_dup);
52030b57cec5SDimitry Andric 
5204e8d8bef9SDimitry Andric   KA_TRACE(40, ("__kmp_taskloop_recur(exit): T#%d\n", gtid));
52050b57cec5SDimitry Andric }
52060b57cec5SDimitry Andric 
5207e8d8bef9SDimitry Andric static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
5208e8d8bef9SDimitry Andric                            kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5209e8d8bef9SDimitry Andric                            int nogroup, int sched, kmp_uint64 grainsize,
5210e8d8bef9SDimitry Andric                            int modifier, void *task_dup) {
52110b57cec5SDimitry Andric   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
52120b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(task != NULL);
52130b57cec5SDimitry Andric   if (nogroup == 0) {
52140b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
52150b57cec5SDimitry Andric     OMPT_STORE_RETURN_ADDRESS(gtid);
52160b57cec5SDimitry Andric #endif
52170b57cec5SDimitry Andric     __kmpc_taskgroup(loc, gtid);
52180b57cec5SDimitry Andric   }
52190b57cec5SDimitry Andric 
522006c3fb27SDimitry Andric #if OMPX_TASKGRAPH
522106c3fb27SDimitry Andric   KMP_ATOMIC_DEC(&__kmp_tdg_task_id);
522206c3fb27SDimitry Andric #endif
52230b57cec5SDimitry Andric   // =========================================================================
52240b57cec5SDimitry Andric   // calculate loop parameters
52250b57cec5SDimitry Andric   kmp_taskloop_bounds_t task_bounds(task, lb, ub);
52260b57cec5SDimitry Andric   kmp_uint64 tc;
52270b57cec5SDimitry Andric   // compiler provides global bounds here
52280b57cec5SDimitry Andric   kmp_uint64 lower = task_bounds.get_lb();
52290b57cec5SDimitry Andric   kmp_uint64 upper = task_bounds.get_ub();
52300b57cec5SDimitry Andric   kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag
52310b57cec5SDimitry Andric   kmp_uint64 num_tasks = 0, extras = 0;
5232e8d8bef9SDimitry Andric   kmp_int64 last_chunk =
5233e8d8bef9SDimitry Andric       0; // reduce grainsize of last task by last_chunk in strict mode
52340b57cec5SDimitry Andric   kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
52350b57cec5SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
52360b57cec5SDimitry Andric   kmp_taskdata_t *current_task = thread->th.th_current_task;
52370b57cec5SDimitry Andric 
5238e8d8bef9SDimitry Andric   KA_TRACE(20, ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
5239e8d8bef9SDimitry Andric                 "grain %llu(%d, %d), dup %p\n",
5240e8d8bef9SDimitry Andric                 gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
5241e8d8bef9SDimitry Andric                 task_dup));
52420b57cec5SDimitry Andric 
52430b57cec5SDimitry Andric   // compute trip count
52440b57cec5SDimitry Andric   if (st == 1) { // most common case
52450b57cec5SDimitry Andric     tc = upper - lower + 1;
52460b57cec5SDimitry Andric   } else if (st < 0) {
52470b57cec5SDimitry Andric     tc = (lower - upper) / (-st) + 1;
52480b57cec5SDimitry Andric   } else { // st > 0
52490b57cec5SDimitry Andric     tc = (upper - lower) / st + 1;
52500b57cec5SDimitry Andric   }
52510b57cec5SDimitry Andric   if (tc == 0) {
5252e8d8bef9SDimitry Andric     KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
52530b57cec5SDimitry Andric     // free the pattern task and exit
52540b57cec5SDimitry Andric     __kmp_task_start(gtid, task, current_task);
52550b57cec5SDimitry Andric     // do not execute anything for zero-trip loop
52560b57cec5SDimitry Andric     __kmp_task_finish<false>(gtid, task, current_task);
52570b57cec5SDimitry Andric     return;
52580b57cec5SDimitry Andric   }
52590b57cec5SDimitry Andric 
52600b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
52610b57cec5SDimitry Andric   ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
52620b57cec5SDimitry Andric   ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
52630b57cec5SDimitry Andric   if (ompt_enabled.ompt_callback_work) {
52640b57cec5SDimitry Andric     ompt_callbacks.ompt_callback(ompt_callback_work)(
52650b57cec5SDimitry Andric         ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
52660b57cec5SDimitry Andric         &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
52670b57cec5SDimitry Andric   }
52680b57cec5SDimitry Andric #endif
52690b57cec5SDimitry Andric 
52700b57cec5SDimitry Andric   if (num_tasks_min == 0)
52710b57cec5SDimitry Andric     // TODO: can we choose better default heuristic?
52720b57cec5SDimitry Andric     num_tasks_min =
52730b57cec5SDimitry Andric         KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
52740b57cec5SDimitry Andric 
52750b57cec5SDimitry Andric   // compute num_tasks/grainsize based on the input provided
52760b57cec5SDimitry Andric   switch (sched) {
52770b57cec5SDimitry Andric   case 0: // no schedule clause specified, we can choose the default
52780b57cec5SDimitry Andric     // let's try to schedule (team_size*10) tasks
52790b57cec5SDimitry Andric     grainsize = thread->th.th_team_nproc * 10;
52800b57cec5SDimitry Andric     KMP_FALLTHROUGH();
52810b57cec5SDimitry Andric   case 2: // num_tasks provided
52820b57cec5SDimitry Andric     if (grainsize > tc) {
52830b57cec5SDimitry Andric       num_tasks = tc; // too big num_tasks requested, adjust values
52840b57cec5SDimitry Andric       grainsize = 1;
52850b57cec5SDimitry Andric       extras = 0;
52860b57cec5SDimitry Andric     } else {
52870b57cec5SDimitry Andric       num_tasks = grainsize;
52880b57cec5SDimitry Andric       grainsize = tc / num_tasks;
52890b57cec5SDimitry Andric       extras = tc % num_tasks;
52900b57cec5SDimitry Andric     }
52910b57cec5SDimitry Andric     break;
52920b57cec5SDimitry Andric   case 1: // grainsize provided
52930b57cec5SDimitry Andric     if (grainsize > tc) {
5294e8d8bef9SDimitry Andric       num_tasks = 1;
5295e8d8bef9SDimitry Andric       grainsize = tc; // too big grainsize requested, adjust values
5296e8d8bef9SDimitry Andric       extras = 0;
5297e8d8bef9SDimitry Andric     } else {
5298e8d8bef9SDimitry Andric       if (modifier) {
5299e8d8bef9SDimitry Andric         num_tasks = (tc + grainsize - 1) / grainsize;
5300e8d8bef9SDimitry Andric         last_chunk = tc - (num_tasks * grainsize);
53010b57cec5SDimitry Andric         extras = 0;
53020b57cec5SDimitry Andric       } else {
53030b57cec5SDimitry Andric         num_tasks = tc / grainsize;
53040b57cec5SDimitry Andric         // adjust grainsize for balanced distribution of iterations
53050b57cec5SDimitry Andric         grainsize = tc / num_tasks;
53060b57cec5SDimitry Andric         extras = tc % num_tasks;
53070b57cec5SDimitry Andric       }
5308e8d8bef9SDimitry Andric     }
53090b57cec5SDimitry Andric     break;
53100b57cec5SDimitry Andric   default:
53110b57cec5SDimitry Andric     KMP_ASSERT2(0, "unknown scheduling of taskloop");
53120b57cec5SDimitry Andric   }
5313e8d8bef9SDimitry Andric 
5314fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
5315fe6060f1SDimitry Andric                              (last_chunk < 0 ? last_chunk : extras));
53160b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(num_tasks > extras);
53170b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(num_tasks > 0);
53180b57cec5SDimitry Andric   // =========================================================================
53190b57cec5SDimitry Andric 
53200b57cec5SDimitry Andric   // check if clause value first
53210b57cec5SDimitry Andric   // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native)
53220b57cec5SDimitry Andric   if (if_val == 0) { // if(0) specified, mark task as serial
53230b57cec5SDimitry Andric     taskdata->td_flags.task_serial = 1;
53240b57cec5SDimitry Andric     taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
53250b57cec5SDimitry Andric     // always start serial tasks linearly
53260b57cec5SDimitry Andric     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5327e8d8bef9SDimitry Andric                           grainsize, extras, last_chunk, tc,
53280b57cec5SDimitry Andric #if OMPT_SUPPORT
53290b57cec5SDimitry Andric                           OMPT_GET_RETURN_ADDRESS(0),
53300b57cec5SDimitry Andric #endif
53310b57cec5SDimitry Andric                           task_dup);
53320b57cec5SDimitry Andric     // !taskdata->td_flags.native => currently force linear spawning of tasks
53330b57cec5SDimitry Andric     // for GOMP_taskloop
53340b57cec5SDimitry Andric   } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
5335e8d8bef9SDimitry Andric     KA_TRACE(20, ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
5336e8d8bef9SDimitry Andric                   "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5337e8d8bef9SDimitry Andric                   gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5338e8d8bef9SDimitry Andric                   last_chunk));
53390b57cec5SDimitry Andric     __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5340e8d8bef9SDimitry Andric                          grainsize, extras, last_chunk, tc, num_tasks_min,
53410b57cec5SDimitry Andric #if OMPT_SUPPORT
53420b57cec5SDimitry Andric                          OMPT_GET_RETURN_ADDRESS(0),
53430b57cec5SDimitry Andric #endif
53440b57cec5SDimitry Andric                          task_dup);
53450b57cec5SDimitry Andric   } else {
5346e8d8bef9SDimitry Andric     KA_TRACE(20, ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
5347e8d8bef9SDimitry Andric                   "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5348e8d8bef9SDimitry Andric                   gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5349e8d8bef9SDimitry Andric                   last_chunk));
53500b57cec5SDimitry Andric     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5351e8d8bef9SDimitry Andric                           grainsize, extras, last_chunk, tc,
53520b57cec5SDimitry Andric #if OMPT_SUPPORT
53530b57cec5SDimitry Andric                           OMPT_GET_RETURN_ADDRESS(0),
53540b57cec5SDimitry Andric #endif
53550b57cec5SDimitry Andric                           task_dup);
53560b57cec5SDimitry Andric   }
53570b57cec5SDimitry Andric 
53580b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
53590b57cec5SDimitry Andric   if (ompt_enabled.ompt_callback_work) {
53600b57cec5SDimitry Andric     ompt_callbacks.ompt_callback(ompt_callback_work)(
53610b57cec5SDimitry Andric         ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
53620b57cec5SDimitry Andric         &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
53630b57cec5SDimitry Andric   }
53640b57cec5SDimitry Andric #endif
53650b57cec5SDimitry Andric 
53660b57cec5SDimitry Andric   if (nogroup == 0) {
53670b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
53680b57cec5SDimitry Andric     OMPT_STORE_RETURN_ADDRESS(gtid);
53690b57cec5SDimitry Andric #endif
53700b57cec5SDimitry Andric     __kmpc_end_taskgroup(loc, gtid);
53710b57cec5SDimitry Andric   }
5372e8d8bef9SDimitry Andric   KA_TRACE(20, ("__kmp_taskloop(exit): T#%d\n", gtid));
5373e8d8bef9SDimitry Andric }
5374e8d8bef9SDimitry Andric 
5375e8d8bef9SDimitry Andric /*!
5376e8d8bef9SDimitry Andric @ingroup TASKING
5377e8d8bef9SDimitry Andric @param loc       Source location information
5378e8d8bef9SDimitry Andric @param gtid      Global thread ID
5379e8d8bef9SDimitry Andric @param task      Task structure
5380e8d8bef9SDimitry Andric @param if_val    Value of the if clause
5381e8d8bef9SDimitry Andric @param lb        Pointer to loop lower bound in task structure
5382e8d8bef9SDimitry Andric @param ub        Pointer to loop upper bound in task structure
5383e8d8bef9SDimitry Andric @param st        Loop stride
5384e8d8bef9SDimitry Andric @param nogroup   Flag, 1 if nogroup clause specified, 0 otherwise
5385e8d8bef9SDimitry Andric @param sched     Schedule specified 0/1/2 for none/grainsize/num_tasks
5386e8d8bef9SDimitry Andric @param grainsize Schedule value if specified
5387e8d8bef9SDimitry Andric @param task_dup  Tasks duplication routine
5388e8d8bef9SDimitry Andric 
5389e8d8bef9SDimitry Andric Execute the taskloop construct.
5390e8d8bef9SDimitry Andric */
5391e8d8bef9SDimitry Andric void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
5392e8d8bef9SDimitry Andric                      kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
5393e8d8bef9SDimitry Andric                      int sched, kmp_uint64 grainsize, void *task_dup) {
5394e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
5395e8d8bef9SDimitry Andric   KA_TRACE(20, ("__kmpc_taskloop(enter): T#%d\n", gtid));
5396e8d8bef9SDimitry Andric   __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5397e8d8bef9SDimitry Andric                  0, task_dup);
53980b57cec5SDimitry Andric   KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
53990b57cec5SDimitry Andric }
5400e8d8bef9SDimitry Andric 
5401e8d8bef9SDimitry Andric /*!
5402e8d8bef9SDimitry Andric @ingroup TASKING
5403e8d8bef9SDimitry Andric @param loc       Source location information
5404e8d8bef9SDimitry Andric @param gtid      Global thread ID
5405e8d8bef9SDimitry Andric @param task      Task structure
5406e8d8bef9SDimitry Andric @param if_val    Value of the if clause
5407e8d8bef9SDimitry Andric @param lb        Pointer to loop lower bound in task structure
5408e8d8bef9SDimitry Andric @param ub        Pointer to loop upper bound in task structure
5409e8d8bef9SDimitry Andric @param st        Loop stride
5410e8d8bef9SDimitry Andric @param nogroup   Flag, 1 if nogroup clause specified, 0 otherwise
5411e8d8bef9SDimitry Andric @param sched     Schedule specified 0/1/2 for none/grainsize/num_tasks
5412e8d8bef9SDimitry Andric @param grainsize Schedule value if specified
541381ad6265SDimitry Andric @param modifier  Modifier 'strict' for sched, 1 if present, 0 otherwise
5414e8d8bef9SDimitry Andric @param task_dup  Tasks duplication routine
5415e8d8bef9SDimitry Andric 
5416e8d8bef9SDimitry Andric Execute the taskloop construct.
5417e8d8bef9SDimitry Andric */
5418e8d8bef9SDimitry Andric void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
5419e8d8bef9SDimitry Andric                        kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5420e8d8bef9SDimitry Andric                        int nogroup, int sched, kmp_uint64 grainsize,
5421e8d8bef9SDimitry Andric                        int modifier, void *task_dup) {
5422e8d8bef9SDimitry Andric   __kmp_assert_valid_gtid(gtid);
5423e8d8bef9SDimitry Andric   KA_TRACE(20, ("__kmpc_taskloop_5(enter): T#%d\n", gtid));
5424e8d8bef9SDimitry Andric   __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5425e8d8bef9SDimitry Andric                  modifier, task_dup);
5426e8d8bef9SDimitry Andric   KA_TRACE(20, ("__kmpc_taskloop_5(exit): T#%d\n", gtid));
5427e8d8bef9SDimitry Andric }
5428bdd1243dSDimitry Andric 
5429bdd1243dSDimitry Andric /*!
5430bdd1243dSDimitry Andric @ingroup TASKING
5431bdd1243dSDimitry Andric @param gtid Global Thread ID of current thread
5432bdd1243dSDimitry Andric @return Returns a pointer to the thread's current task async handle. If no task
5433bdd1243dSDimitry Andric is present or gtid is invalid, returns NULL.
5434bdd1243dSDimitry Andric 
5435bdd1243dSDimitry Andric Acqurires a pointer to the target async handle from the current task.
5436bdd1243dSDimitry Andric */
5437bdd1243dSDimitry Andric void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid) {
5438bdd1243dSDimitry Andric   if (gtid == KMP_GTID_DNE)
5439bdd1243dSDimitry Andric     return NULL;
5440bdd1243dSDimitry Andric 
5441bdd1243dSDimitry Andric   kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5442bdd1243dSDimitry Andric   kmp_taskdata_t *taskdata = thread->th.th_current_task;
5443bdd1243dSDimitry Andric 
5444bdd1243dSDimitry Andric   if (!taskdata)
5445bdd1243dSDimitry Andric     return NULL;
5446bdd1243dSDimitry Andric 
5447bdd1243dSDimitry Andric   return &taskdata->td_target_data.async_handle;
5448bdd1243dSDimitry Andric }
5449bdd1243dSDimitry Andric 
5450bdd1243dSDimitry Andric /*!
5451bdd1243dSDimitry Andric @ingroup TASKING
5452bdd1243dSDimitry Andric @param gtid Global Thread ID of current thread
5453bdd1243dSDimitry Andric @return Returns TRUE if the current task being executed of the given thread has
5454bdd1243dSDimitry Andric a task team allocated to it. Otherwise, returns FALSE.
5455bdd1243dSDimitry Andric 
5456bdd1243dSDimitry Andric Checks if the current thread has a task team.
5457bdd1243dSDimitry Andric */
5458bdd1243dSDimitry Andric bool __kmpc_omp_has_task_team(kmp_int32 gtid) {
5459bdd1243dSDimitry Andric   if (gtid == KMP_GTID_DNE)
5460bdd1243dSDimitry Andric     return FALSE;
5461bdd1243dSDimitry Andric 
5462bdd1243dSDimitry Andric   kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5463bdd1243dSDimitry Andric   kmp_taskdata_t *taskdata = thread->th.th_current_task;
5464bdd1243dSDimitry Andric 
5465bdd1243dSDimitry Andric   if (!taskdata)
5466bdd1243dSDimitry Andric     return FALSE;
5467bdd1243dSDimitry Andric 
5468bdd1243dSDimitry Andric   return taskdata->td_task_team != NULL;
5469bdd1243dSDimitry Andric }
547006c3fb27SDimitry Andric 
547106c3fb27SDimitry Andric #if OMPX_TASKGRAPH
547206c3fb27SDimitry Andric // __kmp_find_tdg: identify a TDG through its ID
547306c3fb27SDimitry Andric // gtid:   Global Thread ID
547406c3fb27SDimitry Andric // tdg_id: ID of the TDG
547506c3fb27SDimitry Andric // returns: If a TDG corresponding to this ID is found and not
547606c3fb27SDimitry Andric // its initial state, return the pointer to it, otherwise nullptr
547706c3fb27SDimitry Andric static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) {
547806c3fb27SDimitry Andric   kmp_tdg_info_t *res = nullptr;
547906c3fb27SDimitry Andric   if (__kmp_max_tdgs == 0)
548006c3fb27SDimitry Andric     return res;
548106c3fb27SDimitry Andric 
548206c3fb27SDimitry Andric   if (__kmp_global_tdgs == NULL)
548306c3fb27SDimitry Andric     __kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate(
548406c3fb27SDimitry Andric         sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs);
548506c3fb27SDimitry Andric 
548606c3fb27SDimitry Andric   if ((__kmp_global_tdgs[tdg_id]) &&
548706c3fb27SDimitry Andric       (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
548806c3fb27SDimitry Andric     res = __kmp_global_tdgs[tdg_id];
548906c3fb27SDimitry Andric   return res;
549006c3fb27SDimitry Andric }
549106c3fb27SDimitry Andric 
549206c3fb27SDimitry Andric // __kmp_print_tdg_dot: prints the TDG to a dot file
549306c3fb27SDimitry Andric // tdg:    ID of the TDG
549406c3fb27SDimitry Andric void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg) {
549506c3fb27SDimitry Andric   kmp_int32 tdg_id = tdg->tdg_id;
549606c3fb27SDimitry Andric   KA_TRACE(10, ("__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", gtid, tdg_id));
549706c3fb27SDimitry Andric 
549806c3fb27SDimitry Andric   char file_name[20];
549906c3fb27SDimitry Andric   sprintf(file_name, "tdg_%d.dot", tdg_id);
550006c3fb27SDimitry Andric   kmp_safe_raii_file_t tdg_file(file_name, "w");
550106c3fb27SDimitry Andric 
550206c3fb27SDimitry Andric   kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
550306c3fb27SDimitry Andric   fprintf(tdg_file,
550406c3fb27SDimitry Andric           "digraph TDG {\n"
550506c3fb27SDimitry Andric           "   compound=true\n"
550606c3fb27SDimitry Andric           "   subgraph cluster {\n"
550706c3fb27SDimitry Andric           "      label=TDG_%d\n",
550806c3fb27SDimitry Andric           tdg_id);
550906c3fb27SDimitry Andric   for (kmp_int32 i = 0; i < num_tasks; i++) {
551006c3fb27SDimitry Andric     fprintf(tdg_file, "      %d[style=bold]\n", i);
551106c3fb27SDimitry Andric   }
551206c3fb27SDimitry Andric   fprintf(tdg_file, "   }\n");
551306c3fb27SDimitry Andric   for (kmp_int32 i = 0; i < num_tasks; i++) {
551406c3fb27SDimitry Andric     kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
551506c3fb27SDimitry Andric     kmp_int32 *successors = tdg->record_map[i].successors;
551606c3fb27SDimitry Andric     if (nsuccessors > 0) {
551706c3fb27SDimitry Andric       for (kmp_int32 j = 0; j < nsuccessors; j++)
551806c3fb27SDimitry Andric         fprintf(tdg_file, "   %d -> %d \n", i, successors[j]);
551906c3fb27SDimitry Andric     }
552006c3fb27SDimitry Andric   }
552106c3fb27SDimitry Andric   fprintf(tdg_file, "}");
552206c3fb27SDimitry Andric   KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id));
552306c3fb27SDimitry Andric }
552406c3fb27SDimitry Andric 
552506c3fb27SDimitry Andric // __kmp_start_record: launch the execution of a previous
552606c3fb27SDimitry Andric // recorded TDG
552706c3fb27SDimitry Andric // gtid:   Global Thread ID
552806c3fb27SDimitry Andric // tdg:    ID of the TDG
552906c3fb27SDimitry Andric void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
553006c3fb27SDimitry Andric   KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY);
553106c3fb27SDimitry Andric   KA_TRACE(10, ("__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid,
553206c3fb27SDimitry Andric                 tdg->tdg_id, tdg->num_roots));
553306c3fb27SDimitry Andric   kmp_node_info_t *this_record_map = tdg->record_map;
553406c3fb27SDimitry Andric   kmp_int32 *this_root_tasks = tdg->root_tasks;
553506c3fb27SDimitry Andric   kmp_int32 this_num_roots = tdg->num_roots;
553606c3fb27SDimitry Andric   kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
553706c3fb27SDimitry Andric 
553806c3fb27SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
553906c3fb27SDimitry Andric   kmp_taskdata_t *parent_task = thread->th.th_current_task;
554006c3fb27SDimitry Andric 
554106c3fb27SDimitry Andric   if (tdg->rec_taskred_data) {
554206c3fb27SDimitry Andric     __kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
554306c3fb27SDimitry Andric   }
554406c3fb27SDimitry Andric 
554506c3fb27SDimitry Andric   for (kmp_int32 j = 0; j < this_num_tasks; j++) {
554606c3fb27SDimitry Andric     kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
554706c3fb27SDimitry Andric 
554806c3fb27SDimitry Andric     td->td_parent = parent_task;
554906c3fb27SDimitry Andric     this_record_map[j].parent_task = parent_task;
555006c3fb27SDimitry Andric 
555106c3fb27SDimitry Andric     kmp_taskgroup_t *parent_taskgroup =
555206c3fb27SDimitry Andric         this_record_map[j].parent_task->td_taskgroup;
555306c3fb27SDimitry Andric 
555406c3fb27SDimitry Andric     KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter,
555506c3fb27SDimitry Andric                       this_record_map[j].npredecessors);
555606c3fb27SDimitry Andric     KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks);
555706c3fb27SDimitry Andric 
555806c3fb27SDimitry Andric     if (parent_taskgroup) {
555906c3fb27SDimitry Andric       KMP_ATOMIC_INC(&parent_taskgroup->count);
556006c3fb27SDimitry Andric       // The taskgroup is different so we must update it
556106c3fb27SDimitry Andric       td->td_taskgroup = parent_taskgroup;
556206c3fb27SDimitry Andric     } else if (td->td_taskgroup != nullptr) {
556306c3fb27SDimitry Andric       // If the parent doesnt have a taskgroup, remove it from the task
556406c3fb27SDimitry Andric       td->td_taskgroup = nullptr;
556506c3fb27SDimitry Andric     }
556606c3fb27SDimitry Andric     if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT)
556706c3fb27SDimitry Andric       KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks);
556806c3fb27SDimitry Andric   }
556906c3fb27SDimitry Andric 
557006c3fb27SDimitry Andric   for (kmp_int32 j = 0; j < this_num_roots; ++j) {
557106c3fb27SDimitry Andric     __kmp_omp_task(gtid, this_record_map[this_root_tasks[j]].task, true);
557206c3fb27SDimitry Andric   }
557306c3fb27SDimitry Andric   KA_TRACE(10, ("__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid,
557406c3fb27SDimitry Andric                 tdg->tdg_id, tdg->num_roots));
557506c3fb27SDimitry Andric }
557606c3fb27SDimitry Andric 
557706c3fb27SDimitry Andric // __kmp_start_record: set up a TDG structure and turn the
557806c3fb27SDimitry Andric // recording flag to true
557906c3fb27SDimitry Andric // gtid:        Global Thread ID of the encountering thread
558006c3fb27SDimitry Andric // input_flags: Flags associated with the TDG
558106c3fb27SDimitry Andric // tdg_id:      ID of the TDG to record
558206c3fb27SDimitry Andric static inline void __kmp_start_record(kmp_int32 gtid,
558306c3fb27SDimitry Andric                                       kmp_taskgraph_flags_t *flags,
558406c3fb27SDimitry Andric                                       kmp_int32 tdg_id) {
558506c3fb27SDimitry Andric   kmp_tdg_info_t *tdg =
558606c3fb27SDimitry Andric       (kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t));
558706c3fb27SDimitry Andric   __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
558806c3fb27SDimitry Andric   // Initializing the TDG structure
558906c3fb27SDimitry Andric   tdg->tdg_id = tdg_id;
559006c3fb27SDimitry Andric   tdg->map_size = INIT_MAPSIZE;
559106c3fb27SDimitry Andric   tdg->num_roots = -1;
559206c3fb27SDimitry Andric   tdg->root_tasks = nullptr;
559306c3fb27SDimitry Andric   tdg->tdg_status = KMP_TDG_RECORDING;
559406c3fb27SDimitry Andric   tdg->rec_num_taskred = 0;
559506c3fb27SDimitry Andric   tdg->rec_taskred_data = nullptr;
559606c3fb27SDimitry Andric   KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0);
559706c3fb27SDimitry Andric 
559806c3fb27SDimitry Andric   // Initializing the list of nodes in this TDG
559906c3fb27SDimitry Andric   kmp_node_info_t *this_record_map =
560006c3fb27SDimitry Andric       (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t));
560106c3fb27SDimitry Andric   for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
560206c3fb27SDimitry Andric     kmp_int32 *successorsList =
560306c3fb27SDimitry Andric         (kmp_int32 *)__kmp_allocate(__kmp_successors_size * sizeof(kmp_int32));
560406c3fb27SDimitry Andric     this_record_map[i].task = nullptr;
560506c3fb27SDimitry Andric     this_record_map[i].successors = successorsList;
560606c3fb27SDimitry Andric     this_record_map[i].nsuccessors = 0;
560706c3fb27SDimitry Andric     this_record_map[i].npredecessors = 0;
560806c3fb27SDimitry Andric     this_record_map[i].successors_size = __kmp_successors_size;
560906c3fb27SDimitry Andric     KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
561006c3fb27SDimitry Andric   }
561106c3fb27SDimitry Andric 
561206c3fb27SDimitry Andric   __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
561306c3fb27SDimitry Andric }
561406c3fb27SDimitry Andric 
561506c3fb27SDimitry Andric // __kmpc_start_record_task: Wrapper around __kmp_start_record to mark
561606c3fb27SDimitry Andric // the beginning of the record process of a task region
561706c3fb27SDimitry Andric // loc_ref:     Location of TDG, not used yet
561806c3fb27SDimitry Andric // gtid:        Global Thread ID of the encountering thread
561906c3fb27SDimitry Andric // input_flags: Flags associated with the TDG
562006c3fb27SDimitry Andric // tdg_id:      ID of the TDG to record, for now, incremental integer
562106c3fb27SDimitry Andric // returns:     1 if we record, otherwise, 0
562206c3fb27SDimitry Andric kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid,
562306c3fb27SDimitry Andric                                    kmp_int32 input_flags, kmp_int32 tdg_id) {
562406c3fb27SDimitry Andric 
562506c3fb27SDimitry Andric   kmp_int32 res;
562606c3fb27SDimitry Andric   kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags;
562706c3fb27SDimitry Andric   KA_TRACE(10,
562806c3fb27SDimitry Andric            ("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n",
562906c3fb27SDimitry Andric             gtid, loc_ref, input_flags, tdg_id));
563006c3fb27SDimitry Andric 
563106c3fb27SDimitry Andric   if (__kmp_max_tdgs == 0) {
563206c3fb27SDimitry Andric     KA_TRACE(
563306c3fb27SDimitry Andric         10,
563406c3fb27SDimitry Andric         ("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, "
563506c3fb27SDimitry Andric          "__kmp_max_tdgs = 0\n",
563606c3fb27SDimitry Andric          gtid, loc_ref, input_flags, tdg_id));
563706c3fb27SDimitry Andric     return 1;
563806c3fb27SDimitry Andric   }
563906c3fb27SDimitry Andric 
564006c3fb27SDimitry Andric   __kmpc_taskgroup(loc_ref, gtid);
564106c3fb27SDimitry Andric   if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) {
564206c3fb27SDimitry Andric     // TODO: use re_record flag
564306c3fb27SDimitry Andric     __kmp_exec_tdg(gtid, tdg);
564406c3fb27SDimitry Andric     res = 0;
564506c3fb27SDimitry Andric   } else {
564606c3fb27SDimitry Andric     __kmp_curr_tdg_idx = tdg_id;
564706c3fb27SDimitry Andric     KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs);
564806c3fb27SDimitry Andric     __kmp_start_record(gtid, flags, tdg_id);
564906c3fb27SDimitry Andric     __kmp_num_tdg++;
565006c3fb27SDimitry Andric     res = 1;
565106c3fb27SDimitry Andric   }
565206c3fb27SDimitry Andric   KA_TRACE(10, ("__kmpc_start_record_task(exit): T#%d TDG %d starts to %s\n",
565306c3fb27SDimitry Andric                 gtid, tdg_id, res ? "record" : "execute"));
565406c3fb27SDimitry Andric   return res;
565506c3fb27SDimitry Andric }
565606c3fb27SDimitry Andric 
565706c3fb27SDimitry Andric // __kmp_end_record: set up a TDG after recording it
565806c3fb27SDimitry Andric // gtid:   Global thread ID
565906c3fb27SDimitry Andric // tdg:    Pointer to the TDG
566006c3fb27SDimitry Andric void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
566106c3fb27SDimitry Andric   // Store roots
566206c3fb27SDimitry Andric   kmp_node_info_t *this_record_map = tdg->record_map;
566306c3fb27SDimitry Andric   kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
566406c3fb27SDimitry Andric   kmp_int32 *this_root_tasks =
566506c3fb27SDimitry Andric       (kmp_int32 *)__kmp_allocate(this_num_tasks * sizeof(kmp_int32));
566606c3fb27SDimitry Andric   kmp_int32 this_map_size = tdg->map_size;
566706c3fb27SDimitry Andric   kmp_int32 this_num_roots = 0;
566806c3fb27SDimitry Andric   kmp_info_t *thread = __kmp_threads[gtid];
566906c3fb27SDimitry Andric 
567006c3fb27SDimitry Andric   for (kmp_int32 i = 0; i < this_num_tasks; i++) {
567106c3fb27SDimitry Andric     if (this_record_map[i].npredecessors == 0) {
567206c3fb27SDimitry Andric       this_root_tasks[this_num_roots++] = i;
567306c3fb27SDimitry Andric     }
567406c3fb27SDimitry Andric   }
567506c3fb27SDimitry Andric 
567606c3fb27SDimitry Andric   // Update with roots info and mapsize
567706c3fb27SDimitry Andric   tdg->map_size = this_map_size;
567806c3fb27SDimitry Andric   tdg->num_roots = this_num_roots;
567906c3fb27SDimitry Andric   tdg->root_tasks = this_root_tasks;
568006c3fb27SDimitry Andric   KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING);
568106c3fb27SDimitry Andric   tdg->tdg_status = KMP_TDG_READY;
568206c3fb27SDimitry Andric 
568306c3fb27SDimitry Andric   if (thread->th.th_current_task->td_dephash) {
568406c3fb27SDimitry Andric     __kmp_dephash_free(thread, thread->th.th_current_task->td_dephash);
568506c3fb27SDimitry Andric     thread->th.th_current_task->td_dephash = NULL;
568606c3fb27SDimitry Andric   }
568706c3fb27SDimitry Andric 
568806c3fb27SDimitry Andric   // Reset predecessor counter
568906c3fb27SDimitry Andric   for (kmp_int32 i = 0; i < this_num_tasks; i++) {
569006c3fb27SDimitry Andric     KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
569106c3fb27SDimitry Andric                       this_record_map[i].npredecessors);
569206c3fb27SDimitry Andric   }
569306c3fb27SDimitry Andric   KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0);
569406c3fb27SDimitry Andric 
569506c3fb27SDimitry Andric   if (__kmp_tdg_dot)
569606c3fb27SDimitry Andric     __kmp_print_tdg_dot(tdg);
569706c3fb27SDimitry Andric }
569806c3fb27SDimitry Andric 
569906c3fb27SDimitry Andric // __kmpc_end_record_task: wrapper around __kmp_end_record to mark
570006c3fb27SDimitry Andric // the end of recording phase
570106c3fb27SDimitry Andric //
570206c3fb27SDimitry Andric // loc_ref:      Source location information
570306c3fb27SDimitry Andric // gtid:         Global thread ID
570406c3fb27SDimitry Andric // input_flags:  Flags attached to the graph
570506c3fb27SDimitry Andric // tdg_id:       ID of the TDG just finished recording
570606c3fb27SDimitry Andric void __kmpc_end_record_task(ident_t *loc_ref, kmp_int32 gtid,
570706c3fb27SDimitry Andric                             kmp_int32 input_flags, kmp_int32 tdg_id) {
570806c3fb27SDimitry Andric   kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id);
570906c3fb27SDimitry Andric 
571006c3fb27SDimitry Andric   KA_TRACE(10, ("__kmpc_end_record_task(enter): T#%d loc=%p finishes recording"
571106c3fb27SDimitry Andric                 " tdg=%d with flags=%d\n",
571206c3fb27SDimitry Andric                 gtid, loc_ref, tdg_id, input_flags));
571306c3fb27SDimitry Andric   if (__kmp_max_tdgs) {
571406c3fb27SDimitry Andric     // TODO: use input_flags->nowait
571506c3fb27SDimitry Andric     __kmpc_end_taskgroup(loc_ref, gtid);
571606c3fb27SDimitry Andric     if (__kmp_tdg_is_recording(tdg->tdg_status))
571706c3fb27SDimitry Andric       __kmp_end_record(gtid, tdg);
571806c3fb27SDimitry Andric   }
571906c3fb27SDimitry Andric   KA_TRACE(10, ("__kmpc_end_record_task(exit): T#%d loc=%p finished recording"
572006c3fb27SDimitry Andric                 " tdg=%d, its status is now READY\n",
572106c3fb27SDimitry Andric                 gtid, loc_ref, tdg_id));
572206c3fb27SDimitry Andric }
572306c3fb27SDimitry Andric #endif
5724