10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_taskdeps.cpp 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric //#define KMP_SUPPORT_GRAPH_OUTPUT 1 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "kmp.h" 160b57cec5SDimitry Andric #include "kmp_io.h" 170b57cec5SDimitry Andric #include "kmp_wait_release.h" 180b57cec5SDimitry Andric #include "kmp_taskdeps.h" 190b57cec5SDimitry Andric #if OMPT_SUPPORT 200b57cec5SDimitry Andric #include "ompt-specific.h" 210b57cec5SDimitry Andric #endif 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric // TODO: Improve memory allocation? keep a list of pre-allocated structures? 240b57cec5SDimitry Andric // allocate in blocks? re-use list finished list entries? 250b57cec5SDimitry Andric // TODO: don't use atomic ref counters for stack-allocated nodes. 260b57cec5SDimitry Andric // TODO: find an alternate to atomic refs for heap-allocated nodes? 270b57cec5SDimitry Andric // TODO: Finish graph output support 280b57cec5SDimitry Andric // TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other 290b57cec5SDimitry Andric // runtime locks 300b57cec5SDimitry Andric // TODO: Any ITT support needed? 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric #ifdef KMP_SUPPORT_GRAPH_OUTPUT 3306c3fb27SDimitry Andric static std::atomic<kmp_int32> kmp_node_id_seed = 0; 340b57cec5SDimitry Andric #endif 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric static void __kmp_init_node(kmp_depnode_t *node) { 370b57cec5SDimitry Andric node->dn.successors = NULL; 385ffd83dbSDimitry Andric node->dn.task = NULL; // will point to the right task 390b57cec5SDimitry Andric // once dependences have been processed 400b57cec5SDimitry Andric for (int i = 0; i < MAX_MTX_DEPS; ++i) 410b57cec5SDimitry Andric node->dn.mtx_locks[i] = NULL; 420b57cec5SDimitry Andric node->dn.mtx_num_locks = 0; 430b57cec5SDimitry Andric __kmp_init_lock(&node->dn.lock); 440b57cec5SDimitry Andric KMP_ATOMIC_ST_RLX(&node->dn.nrefs, 1); // init creates the first reference 450b57cec5SDimitry Andric #ifdef KMP_SUPPORT_GRAPH_OUTPUT 460b57cec5SDimitry Andric node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed); 470b57cec5SDimitry Andric #endif 4881ad6265SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY 4981ad6265SDimitry Andric __itt_sync_create(node, "OMP task dep node", NULL, 0); 5081ad6265SDimitry Andric #endif 510b57cec5SDimitry Andric } 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) { 540b57cec5SDimitry Andric KMP_ATOMIC_INC(&node->dn.nrefs); 550b57cec5SDimitry Andric return node; 560b57cec5SDimitry Andric } 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 }; 590b57cec5SDimitry Andric 60489b1cf2SDimitry Andric size_t sizes[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029}; 61489b1cf2SDimitry Andric const size_t MAX_GEN = 8; 62489b1cf2SDimitry Andric 63e8d8bef9SDimitry Andric static inline size_t __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) { 640b57cec5SDimitry Andric // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % 650b57cec5SDimitry Andric // m_num_sets ); 660b57cec5SDimitry Andric return ((addr >> 6) ^ (addr >> 2)) % hsize; 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric 69489b1cf2SDimitry Andric static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread, 70489b1cf2SDimitry Andric kmp_dephash_t *current_dephash) { 71489b1cf2SDimitry Andric kmp_dephash_t *h; 72489b1cf2SDimitry Andric 73489b1cf2SDimitry Andric size_t gen = current_dephash->generation + 1; 74489b1cf2SDimitry Andric if (gen >= MAX_GEN) 75489b1cf2SDimitry Andric return current_dephash; 76489b1cf2SDimitry Andric size_t new_size = sizes[gen]; 77489b1cf2SDimitry Andric 78e8d8bef9SDimitry Andric size_t size_to_allocate = 79489b1cf2SDimitry Andric new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t); 80489b1cf2SDimitry Andric 81489b1cf2SDimitry Andric #if USE_FAST_MEMORY 82489b1cf2SDimitry Andric h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate); 83489b1cf2SDimitry Andric #else 84489b1cf2SDimitry Andric h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate); 85489b1cf2SDimitry Andric #endif 86489b1cf2SDimitry Andric 87489b1cf2SDimitry Andric h->size = new_size; 88489b1cf2SDimitry Andric h->nelements = current_dephash->nelements; 89489b1cf2SDimitry Andric h->buckets = (kmp_dephash_entry **)(h + 1); 90489b1cf2SDimitry Andric h->generation = gen; 91480093f4SDimitry Andric h->nconflicts = 0; 92349cc55cSDimitry Andric h->last_all = current_dephash->last_all; 93e8d8bef9SDimitry Andric 94e8d8bef9SDimitry Andric // make sure buckets are properly initialized 95e8d8bef9SDimitry Andric for (size_t i = 0; i < new_size; i++) { 96e8d8bef9SDimitry Andric h->buckets[i] = NULL; 97e8d8bef9SDimitry Andric } 98e8d8bef9SDimitry Andric 99489b1cf2SDimitry Andric // insert existing elements in the new table 100489b1cf2SDimitry Andric for (size_t i = 0; i < current_dephash->size; i++) { 101480093f4SDimitry Andric kmp_dephash_entry_t *next, *entry; 102480093f4SDimitry Andric for (entry = current_dephash->buckets[i]; entry; entry = next) { 103489b1cf2SDimitry Andric next = entry->next_in_bucket; 104489b1cf2SDimitry Andric // Compute the new hash using the new size, and insert the entry in 105489b1cf2SDimitry Andric // the new bucket. 106e8d8bef9SDimitry Andric size_t new_bucket = __kmp_dephash_hash(entry->addr, h->size); 107480093f4SDimitry Andric entry->next_in_bucket = h->buckets[new_bucket]; 108489b1cf2SDimitry Andric if (entry->next_in_bucket) { 109489b1cf2SDimitry Andric h->nconflicts++; 110489b1cf2SDimitry Andric } 111489b1cf2SDimitry Andric h->buckets[new_bucket] = entry; 112489b1cf2SDimitry Andric } 113489b1cf2SDimitry Andric } 114489b1cf2SDimitry Andric 115489b1cf2SDimitry Andric // Free old hash table 116489b1cf2SDimitry Andric #if USE_FAST_MEMORY 117489b1cf2SDimitry Andric __kmp_fast_free(thread, current_dephash); 118489b1cf2SDimitry Andric #else 119489b1cf2SDimitry Andric __kmp_thread_free(thread, current_dephash); 120489b1cf2SDimitry Andric #endif 121489b1cf2SDimitry Andric 122489b1cf2SDimitry Andric return h; 123489b1cf2SDimitry Andric } 124489b1cf2SDimitry Andric 1250b57cec5SDimitry Andric static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, 1260b57cec5SDimitry Andric kmp_taskdata_t *current_task) { 1270b57cec5SDimitry Andric kmp_dephash_t *h; 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric size_t h_size; 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric if (current_task->td_flags.tasktype == TASK_IMPLICIT) 1320b57cec5SDimitry Andric h_size = KMP_DEPHASH_MASTER_SIZE; 1330b57cec5SDimitry Andric else 1340b57cec5SDimitry Andric h_size = KMP_DEPHASH_OTHER_SIZE; 1350b57cec5SDimitry Andric 136e8d8bef9SDimitry Andric size_t size = h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t); 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric #if USE_FAST_MEMORY 1390b57cec5SDimitry Andric h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size); 1400b57cec5SDimitry Andric #else 1410b57cec5SDimitry Andric h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size); 1420b57cec5SDimitry Andric #endif 1430b57cec5SDimitry Andric h->size = h_size; 1440b57cec5SDimitry Andric 145489b1cf2SDimitry Andric h->generation = 0; 1460b57cec5SDimitry Andric h->nelements = 0; 1470b57cec5SDimitry Andric h->nconflicts = 0; 1480b57cec5SDimitry Andric h->buckets = (kmp_dephash_entry **)(h + 1); 149349cc55cSDimitry Andric h->last_all = NULL; 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric for (size_t i = 0; i < h_size; i++) 1520b57cec5SDimitry Andric h->buckets[i] = 0; 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric return h; 1550b57cec5SDimitry Andric } 1560b57cec5SDimitry Andric 157fe6060f1SDimitry Andric static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread, 158fe6060f1SDimitry Andric kmp_dephash_t **hash, 159fe6060f1SDimitry Andric kmp_intptr_t addr) { 160489b1cf2SDimitry Andric kmp_dephash_t *h = *hash; 161fe6060f1SDimitry Andric if (h->nelements != 0 && h->nconflicts / h->size >= 1) { 162489b1cf2SDimitry Andric *hash = __kmp_dephash_extend(thread, h); 163489b1cf2SDimitry Andric h = *hash; 164489b1cf2SDimitry Andric } 165e8d8bef9SDimitry Andric size_t bucket = __kmp_dephash_hash(addr, h->size); 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric kmp_dephash_entry_t *entry; 1680b57cec5SDimitry Andric for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket) 1690b57cec5SDimitry Andric if (entry->addr == addr) 1700b57cec5SDimitry Andric break; 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric if (entry == NULL) { 1730b57cec5SDimitry Andric // create entry. This is only done by one thread so no locking required 1740b57cec5SDimitry Andric #if USE_FAST_MEMORY 1750b57cec5SDimitry Andric entry = (kmp_dephash_entry_t *)__kmp_fast_allocate( 1760b57cec5SDimitry Andric thread, sizeof(kmp_dephash_entry_t)); 1770b57cec5SDimitry Andric #else 1780b57cec5SDimitry Andric entry = (kmp_dephash_entry_t *)__kmp_thread_malloc( 1790b57cec5SDimitry Andric thread, sizeof(kmp_dephash_entry_t)); 1800b57cec5SDimitry Andric #endif 1810b57cec5SDimitry Andric entry->addr = addr; 182349cc55cSDimitry Andric if (!h->last_all) // no predecessor task with omp_all_memory dependence 1830b57cec5SDimitry Andric entry->last_out = NULL; 184349cc55cSDimitry Andric else // else link the omp_all_memory depnode to the new entry 185349cc55cSDimitry Andric entry->last_out = __kmp_node_ref(h->last_all); 186fe6060f1SDimitry Andric entry->last_set = NULL; 187fe6060f1SDimitry Andric entry->prev_set = NULL; 188fe6060f1SDimitry Andric entry->last_flag = 0; 1890b57cec5SDimitry Andric entry->mtx_lock = NULL; 1900b57cec5SDimitry Andric entry->next_in_bucket = h->buckets[bucket]; 1910b57cec5SDimitry Andric h->buckets[bucket] = entry; 1920b57cec5SDimitry Andric h->nelements++; 1930b57cec5SDimitry Andric if (entry->next_in_bucket) 1940b57cec5SDimitry Andric h->nconflicts++; 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric return entry; 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread, 2000b57cec5SDimitry Andric kmp_depnode_list_t *list, 2010b57cec5SDimitry Andric kmp_depnode_t *node) { 2020b57cec5SDimitry Andric kmp_depnode_list_t *new_head; 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric #if USE_FAST_MEMORY 2050b57cec5SDimitry Andric new_head = (kmp_depnode_list_t *)__kmp_fast_allocate( 2060b57cec5SDimitry Andric thread, sizeof(kmp_depnode_list_t)); 2070b57cec5SDimitry Andric #else 2080b57cec5SDimitry Andric new_head = (kmp_depnode_list_t *)__kmp_thread_malloc( 2090b57cec5SDimitry Andric thread, sizeof(kmp_depnode_list_t)); 2100b57cec5SDimitry Andric #endif 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric new_head->node = __kmp_node_ref(node); 2130b57cec5SDimitry Andric new_head->next = list; 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric return new_head; 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric 2185ffd83dbSDimitry Andric static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source, 2190b57cec5SDimitry Andric kmp_depnode_t *sink, 2200b57cec5SDimitry Andric kmp_task_t *sink_task) { 22106c3fb27SDimitry Andric #if OMPX_TASKGRAPH 22206c3fb27SDimitry Andric kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); 22306c3fb27SDimitry Andric kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); 22406c3fb27SDimitry Andric if (source->dn.task && sink_task) { 22506c3fb27SDimitry Andric // Not supporting dependency between two tasks that one is within the TDG 22606c3fb27SDimitry Andric // and the other is not 22706c3fb27SDimitry Andric KMP_ASSERT(task_source->is_taskgraph == task_sink->is_taskgraph); 22806c3fb27SDimitry Andric } 22906c3fb27SDimitry Andric if (task_sink->is_taskgraph && 23006c3fb27SDimitry Andric __kmp_tdg_is_recording(task_sink->tdg->tdg_status)) { 23106c3fb27SDimitry Andric kmp_node_info_t *source_info = 23206c3fb27SDimitry Andric &task_sink->tdg->record_map[task_source->td_task_id]; 23306c3fb27SDimitry Andric bool exists = false; 23406c3fb27SDimitry Andric for (int i = 0; i < source_info->nsuccessors; i++) { 23506c3fb27SDimitry Andric if (source_info->successors[i] == task_sink->td_task_id) { 23606c3fb27SDimitry Andric exists = true; 23706c3fb27SDimitry Andric break; 23806c3fb27SDimitry Andric } 23906c3fb27SDimitry Andric } 24006c3fb27SDimitry Andric if (!exists) { 24106c3fb27SDimitry Andric if (source_info->nsuccessors >= source_info->successors_size) { 24206c3fb27SDimitry Andric source_info->successors_size = 2 * source_info->successors_size; 24306c3fb27SDimitry Andric kmp_int32 *old_succ_ids = source_info->successors; 24406c3fb27SDimitry Andric kmp_int32 *new_succ_ids = (kmp_int32 *)__kmp_allocate( 24506c3fb27SDimitry Andric source_info->successors_size * sizeof(kmp_int32)); 24606c3fb27SDimitry Andric source_info->successors = new_succ_ids; 24706c3fb27SDimitry Andric __kmp_free(old_succ_ids); 24806c3fb27SDimitry Andric } 24906c3fb27SDimitry Andric 25006c3fb27SDimitry Andric source_info->successors[source_info->nsuccessors] = task_sink->td_task_id; 25106c3fb27SDimitry Andric source_info->nsuccessors++; 25206c3fb27SDimitry Andric 25306c3fb27SDimitry Andric kmp_node_info_t *sink_info = 25406c3fb27SDimitry Andric &(task_sink->tdg->record_map[task_sink->td_task_id]); 25506c3fb27SDimitry Andric sink_info->npredecessors++; 25606c3fb27SDimitry Andric } 25706c3fb27SDimitry Andric } 25806c3fb27SDimitry Andric #endif 2590b57cec5SDimitry Andric #ifdef KMP_SUPPORT_GRAPH_OUTPUT 2600b57cec5SDimitry Andric kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); 261fe6060f1SDimitry Andric // do not use sink->dn.task as that is only filled after the dependences 2620b57cec5SDimitry Andric // are already processed! 2630b57cec5SDimitry Andric kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, 2660b57cec5SDimitry Andric task_source->td_ident->psource, sink->dn.id, 2670b57cec5SDimitry Andric task_sink->td_ident->psource); 2680b57cec5SDimitry Andric #endif 2690b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL 2700b57cec5SDimitry Andric /* OMPT tracks dependences between task (a=source, b=sink) in which 2710b57cec5SDimitry Andric task a blocks the execution of b through the ompt_new_dependence_callback 2720b57cec5SDimitry Andric */ 2730b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_task_dependence) { 2740b57cec5SDimitry Andric kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); 2755ffd83dbSDimitry Andric ompt_data_t *sink_data; 2765ffd83dbSDimitry Andric if (sink_task) 2775ffd83dbSDimitry Andric sink_data = &(KMP_TASK_TO_TASKDATA(sink_task)->ompt_task_info.task_data); 2785ffd83dbSDimitry Andric else 2795ffd83dbSDimitry Andric sink_data = &__kmp_threads[gtid]->th.ompt_thread_info.task_data; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_dependence)( 2825ffd83dbSDimitry Andric &(task_source->ompt_task_info.task_data), sink_data); 2830b57cec5SDimitry Andric } 2840b57cec5SDimitry Andric #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */ 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric 2875f757f3fSDimitry Andric kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task) { 2885f757f3fSDimitry Andric kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); 2895f757f3fSDimitry Andric return td->td_depnode ? &(td->td_depnode->dn) : NULL; 2905f757f3fSDimitry Andric } 2915f757f3fSDimitry Andric 2925f757f3fSDimitry Andric kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task) { 2935f757f3fSDimitry Andric kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); 2945f757f3fSDimitry Andric return td->td_depnode->dn.successors; 2955f757f3fSDimitry Andric } 2965f757f3fSDimitry Andric 2970b57cec5SDimitry Andric static inline kmp_int32 2980b57cec5SDimitry Andric __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, 2990b57cec5SDimitry Andric kmp_task_t *task, kmp_depnode_t *node, 3000b57cec5SDimitry Andric kmp_depnode_list_t *plist) { 3010b57cec5SDimitry Andric if (!plist) 3020b57cec5SDimitry Andric return 0; 3030b57cec5SDimitry Andric kmp_int32 npredecessors = 0; 3040b57cec5SDimitry Andric // link node as successor of list elements 3050b57cec5SDimitry Andric for (kmp_depnode_list_t *p = plist; p; p = p->next) { 3060b57cec5SDimitry Andric kmp_depnode_t *dep = p->node; 30706c3fb27SDimitry Andric #if OMPX_TASKGRAPH 30806c3fb27SDimitry Andric kmp_tdg_status tdg_status = KMP_TDG_NONE; 30906c3fb27SDimitry Andric if (task) { 31006c3fb27SDimitry Andric kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); 31106c3fb27SDimitry Andric if (td->is_taskgraph) 31206c3fb27SDimitry Andric tdg_status = KMP_TASK_TO_TASKDATA(task)->tdg->tdg_status; 31306c3fb27SDimitry Andric if (__kmp_tdg_is_recording(tdg_status)) 31406c3fb27SDimitry Andric __kmp_track_dependence(gtid, dep, node, task); 31506c3fb27SDimitry Andric } 31606c3fb27SDimitry Andric #endif 3170b57cec5SDimitry Andric if (dep->dn.task) { 3180b57cec5SDimitry Andric KMP_ACQUIRE_DEPNODE(gtid, dep); 3190b57cec5SDimitry Andric if (dep->dn.task) { 3205f757f3fSDimitry Andric if (!dep->dn.successors || dep->dn.successors->node != node) { 32106c3fb27SDimitry Andric #if OMPX_TASKGRAPH 32206c3fb27SDimitry Andric if (!(__kmp_tdg_is_recording(tdg_status)) && task) 32306c3fb27SDimitry Andric #endif 3245ffd83dbSDimitry Andric __kmp_track_dependence(gtid, dep, node, task); 3250b57cec5SDimitry Andric dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node); 3260b57cec5SDimitry Andric KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " 3270b57cec5SDimitry Andric "%p\n", 3280b57cec5SDimitry Andric gtid, KMP_TASK_TO_TASKDATA(dep->dn.task), 3290b57cec5SDimitry Andric KMP_TASK_TO_TASKDATA(task))); 3300b57cec5SDimitry Andric npredecessors++; 3310b57cec5SDimitry Andric } 3325f757f3fSDimitry Andric } 3330b57cec5SDimitry Andric KMP_RELEASE_DEPNODE(gtid, dep); 3340b57cec5SDimitry Andric } 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric return npredecessors; 3370b57cec5SDimitry Andric } 3380b57cec5SDimitry Andric 3395f757f3fSDimitry Andric // Add the edge 'sink' -> 'source' in the task dependency graph 3400b57cec5SDimitry Andric static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, 3410b57cec5SDimitry Andric kmp_info_t *thread, 3420b57cec5SDimitry Andric kmp_task_t *task, 3430b57cec5SDimitry Andric kmp_depnode_t *source, 3440b57cec5SDimitry Andric kmp_depnode_t *sink) { 3450b57cec5SDimitry Andric if (!sink) 3460b57cec5SDimitry Andric return 0; 3470b57cec5SDimitry Andric kmp_int32 npredecessors = 0; 34806c3fb27SDimitry Andric #if OMPX_TASKGRAPH 34906c3fb27SDimitry Andric kmp_tdg_status tdg_status = KMP_TDG_NONE; 35006c3fb27SDimitry Andric kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); 35106c3fb27SDimitry Andric if (task) { 35206c3fb27SDimitry Andric if (td->is_taskgraph) 35306c3fb27SDimitry Andric tdg_status = KMP_TASK_TO_TASKDATA(task)->tdg->tdg_status; 35406c3fb27SDimitry Andric if (__kmp_tdg_is_recording(tdg_status) && sink->dn.task) 35506c3fb27SDimitry Andric __kmp_track_dependence(gtid, sink, source, task); 35606c3fb27SDimitry Andric } 35706c3fb27SDimitry Andric #endif 3580b57cec5SDimitry Andric if (sink->dn.task) { 3590b57cec5SDimitry Andric // synchronously add source to sink' list of successors 3600b57cec5SDimitry Andric KMP_ACQUIRE_DEPNODE(gtid, sink); 3610b57cec5SDimitry Andric if (sink->dn.task) { 3625f757f3fSDimitry Andric if (!sink->dn.successors || sink->dn.successors->node != source) { 36306c3fb27SDimitry Andric #if OMPX_TASKGRAPH 36406c3fb27SDimitry Andric if (!(__kmp_tdg_is_recording(tdg_status)) && task) 36506c3fb27SDimitry Andric #endif 3665ffd83dbSDimitry Andric __kmp_track_dependence(gtid, sink, source, task); 3670b57cec5SDimitry Andric sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source); 3680b57cec5SDimitry Andric KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " 3690b57cec5SDimitry Andric "%p\n", 3700b57cec5SDimitry Andric gtid, KMP_TASK_TO_TASKDATA(sink->dn.task), 3710b57cec5SDimitry Andric KMP_TASK_TO_TASKDATA(task))); 37206c3fb27SDimitry Andric #if OMPX_TASKGRAPH 37306c3fb27SDimitry Andric if (__kmp_tdg_is_recording(tdg_status)) { 37406c3fb27SDimitry Andric kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task); 37506c3fb27SDimitry Andric if (tdd->is_taskgraph) { 37606c3fb27SDimitry Andric if (tdd->td_flags.onced) 37706c3fb27SDimitry Andric // decrement npredecessors if sink->dn.task belongs to a taskgraph 37806c3fb27SDimitry Andric // and 37906c3fb27SDimitry Andric // 1) the task is reset to its initial state (by kmp_free_task) or 38006c3fb27SDimitry Andric // 2) the task is complete but not yet reset 38106c3fb27SDimitry Andric npredecessors--; 38206c3fb27SDimitry Andric } 38306c3fb27SDimitry Andric } 38406c3fb27SDimitry Andric #endif 3850b57cec5SDimitry Andric npredecessors++; 3860b57cec5SDimitry Andric } 3875f757f3fSDimitry Andric } 3880b57cec5SDimitry Andric KMP_RELEASE_DEPNODE(gtid, sink); 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric return npredecessors; 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric 393349cc55cSDimitry Andric static inline kmp_int32 394349cc55cSDimitry Andric __kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h, 395349cc55cSDimitry Andric bool dep_barrier, kmp_task_t *task) { 396349cc55cSDimitry Andric KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, " 397349cc55cSDimitry Andric "dep_barrier = %d\n", 398349cc55cSDimitry Andric gtid, dep_barrier)); 399349cc55cSDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 400349cc55cSDimitry Andric kmp_int32 npredecessors = 0; 401349cc55cSDimitry Andric 402349cc55cSDimitry Andric // process previous omp_all_memory node if any 403349cc55cSDimitry Andric npredecessors += 404349cc55cSDimitry Andric __kmp_depnode_link_successor(gtid, thread, task, node, h->last_all); 405349cc55cSDimitry Andric __kmp_node_deref(thread, h->last_all); 406349cc55cSDimitry Andric if (!dep_barrier) { 407349cc55cSDimitry Andric h->last_all = __kmp_node_ref(node); 408349cc55cSDimitry Andric } else { 409349cc55cSDimitry Andric // if this is a sync point in the serial sequence, then the previous 410349cc55cSDimitry Andric // outputs are guaranteed to be completed after the execution of this 411349cc55cSDimitry Andric // task so the previous output nodes can be cleared. 412349cc55cSDimitry Andric h->last_all = NULL; 413349cc55cSDimitry Andric } 414349cc55cSDimitry Andric 415349cc55cSDimitry Andric // process all regular dependences 416349cc55cSDimitry Andric for (size_t i = 0; i < h->size; i++) { 417349cc55cSDimitry Andric kmp_dephash_entry_t *info = h->buckets[i]; 418349cc55cSDimitry Andric if (!info) // skip empty slots in dephash 419349cc55cSDimitry Andric continue; 420349cc55cSDimitry Andric for (; info; info = info->next_in_bucket) { 421349cc55cSDimitry Andric // for each entry the omp_all_memory works as OUT dependence 422349cc55cSDimitry Andric kmp_depnode_t *last_out = info->last_out; 423349cc55cSDimitry Andric kmp_depnode_list_t *last_set = info->last_set; 424349cc55cSDimitry Andric kmp_depnode_list_t *prev_set = info->prev_set; 425349cc55cSDimitry Andric if (last_set) { 426349cc55cSDimitry Andric npredecessors += 427349cc55cSDimitry Andric __kmp_depnode_link_successor(gtid, thread, task, node, last_set); 428349cc55cSDimitry Andric __kmp_depnode_list_free(thread, last_set); 429349cc55cSDimitry Andric __kmp_depnode_list_free(thread, prev_set); 430349cc55cSDimitry Andric info->last_set = NULL; 431349cc55cSDimitry Andric info->prev_set = NULL; 432349cc55cSDimitry Andric info->last_flag = 0; // no sets in this dephash entry 433349cc55cSDimitry Andric } else { 434349cc55cSDimitry Andric npredecessors += 435349cc55cSDimitry Andric __kmp_depnode_link_successor(gtid, thread, task, node, last_out); 436349cc55cSDimitry Andric } 437349cc55cSDimitry Andric __kmp_node_deref(thread, last_out); 438349cc55cSDimitry Andric if (!dep_barrier) { 439349cc55cSDimitry Andric info->last_out = __kmp_node_ref(node); 440349cc55cSDimitry Andric } else { 441349cc55cSDimitry Andric info->last_out = NULL; 442349cc55cSDimitry Andric } 443349cc55cSDimitry Andric } 444349cc55cSDimitry Andric } 445349cc55cSDimitry Andric KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid, 446349cc55cSDimitry Andric npredecessors)); 447349cc55cSDimitry Andric return npredecessors; 448349cc55cSDimitry Andric } 449349cc55cSDimitry Andric 4500b57cec5SDimitry Andric template <bool filter> 4510b57cec5SDimitry Andric static inline kmp_int32 452489b1cf2SDimitry Andric __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, 4530b57cec5SDimitry Andric bool dep_barrier, kmp_int32 ndeps, 4540b57cec5SDimitry Andric kmp_depend_info_t *dep_list, kmp_task_t *task) { 455fe6060f1SDimitry Andric KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependences : " 4560b57cec5SDimitry Andric "dep_barrier = %d\n", 4570b57cec5SDimitry Andric filter, gtid, ndeps, dep_barrier)); 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 4600b57cec5SDimitry Andric kmp_int32 npredecessors = 0; 4610b57cec5SDimitry Andric for (kmp_int32 i = 0; i < ndeps; i++) { 4620b57cec5SDimitry Andric const kmp_depend_info_t *dep = &dep_list[i]; 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric if (filter && dep->base_addr == 0) 4650b57cec5SDimitry Andric continue; // skip filtered entries 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric kmp_dephash_entry_t *info = 4680b57cec5SDimitry Andric __kmp_dephash_find(thread, hash, dep->base_addr); 4690b57cec5SDimitry Andric kmp_depnode_t *last_out = info->last_out; 470fe6060f1SDimitry Andric kmp_depnode_list_t *last_set = info->last_set; 471fe6060f1SDimitry Andric kmp_depnode_list_t *prev_set = info->prev_set; 4720b57cec5SDimitry Andric 473fe6060f1SDimitry Andric if (dep->flags.out) { // out or inout --> clean lists if any 474fe6060f1SDimitry Andric if (last_set) { 4750b57cec5SDimitry Andric npredecessors += 476fe6060f1SDimitry Andric __kmp_depnode_link_successor(gtid, thread, task, node, last_set); 477fe6060f1SDimitry Andric __kmp_depnode_list_free(thread, last_set); 478fe6060f1SDimitry Andric __kmp_depnode_list_free(thread, prev_set); 479fe6060f1SDimitry Andric info->last_set = NULL; 480fe6060f1SDimitry Andric info->prev_set = NULL; 481fe6060f1SDimitry Andric info->last_flag = 0; // no sets in this dephash entry 4820b57cec5SDimitry Andric } else { 4830b57cec5SDimitry Andric npredecessors += 4840b57cec5SDimitry Andric __kmp_depnode_link_successor(gtid, thread, task, node, last_out); 4850b57cec5SDimitry Andric } 4860b57cec5SDimitry Andric __kmp_node_deref(thread, last_out); 487fe6060f1SDimitry Andric if (!dep_barrier) { 488fe6060f1SDimitry Andric info->last_out = __kmp_node_ref(node); 489fe6060f1SDimitry Andric } else { 4900b57cec5SDimitry Andric // if this is a sync point in the serial sequence, then the previous 4910b57cec5SDimitry Andric // outputs are guaranteed to be completed after the execution of this 4920b57cec5SDimitry Andric // task so the previous output nodes can be cleared. 4930b57cec5SDimitry Andric info->last_out = NULL; 4940b57cec5SDimitry Andric } 495fe6060f1SDimitry Andric } else { // either IN or MTX or SET 496fe6060f1SDimitry Andric if (info->last_flag == 0 || info->last_flag == dep->flag) { 497fe6060f1SDimitry Andric // last_set either didn't exist or of same dep kind 4980b57cec5SDimitry Andric // link node as successor of the last_out if any 4990b57cec5SDimitry Andric npredecessors += 5000b57cec5SDimitry Andric __kmp_depnode_link_successor(gtid, thread, task, node, last_out); 501fe6060f1SDimitry Andric // link node as successor of all nodes in the prev_set if any 5020b57cec5SDimitry Andric npredecessors += 503fe6060f1SDimitry Andric __kmp_depnode_link_successor(gtid, thread, task, node, prev_set); 5046e75b2fbSDimitry Andric if (dep_barrier) { 5056e75b2fbSDimitry Andric // clean last_out and prev_set if any; don't touch last_set 5066e75b2fbSDimitry Andric __kmp_node_deref(thread, last_out); 5076e75b2fbSDimitry Andric info->last_out = NULL; 5086e75b2fbSDimitry Andric __kmp_depnode_list_free(thread, prev_set); 5096e75b2fbSDimitry Andric info->prev_set = NULL; 5106e75b2fbSDimitry Andric } 511fe6060f1SDimitry Andric } else { // last_set is of different dep kind, make it prev_set 512fe6060f1SDimitry Andric // link node as successor of all nodes in the last_set 513fe6060f1SDimitry Andric npredecessors += 514fe6060f1SDimitry Andric __kmp_depnode_link_successor(gtid, thread, task, node, last_set); 515fe6060f1SDimitry Andric // clean last_out if any 5160b57cec5SDimitry Andric __kmp_node_deref(thread, last_out); 5170b57cec5SDimitry Andric info->last_out = NULL; 518fe6060f1SDimitry Andric // clean prev_set if any 519fe6060f1SDimitry Andric __kmp_depnode_list_free(thread, prev_set); 5206e75b2fbSDimitry Andric if (!dep_barrier) { 521fe6060f1SDimitry Andric // move last_set to prev_set, new last_set will be allocated 522fe6060f1SDimitry Andric info->prev_set = last_set; 5236e75b2fbSDimitry Andric } else { 5246e75b2fbSDimitry Andric info->prev_set = NULL; 5256e75b2fbSDimitry Andric info->last_flag = 0; 5266e75b2fbSDimitry Andric } 527fe6060f1SDimitry Andric info->last_set = NULL; 5280b57cec5SDimitry Andric } 5296e75b2fbSDimitry Andric // for dep_barrier last_flag value should remain: 5306e75b2fbSDimitry Andric // 0 if last_set is empty, unchanged otherwise 5316e75b2fbSDimitry Andric if (!dep_barrier) { 532fe6060f1SDimitry Andric info->last_flag = dep->flag; // store dep kind of the last_set 533fe6060f1SDimitry Andric info->last_set = __kmp_add_node(thread, info->last_set, node); 5346e75b2fbSDimitry Andric } 535fe6060f1SDimitry Andric // check if we are processing MTX dependency 536fe6060f1SDimitry Andric if (dep->flag == KMP_DEP_MTX) { 5370b57cec5SDimitry Andric if (info->mtx_lock == NULL) { 5380b57cec5SDimitry Andric info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t)); 5390b57cec5SDimitry Andric __kmp_init_lock(info->mtx_lock); 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS); 5420b57cec5SDimitry Andric kmp_int32 m; 5430b57cec5SDimitry Andric // Save lock in node's array 5440b57cec5SDimitry Andric for (m = 0; m < MAX_MTX_DEPS; ++m) { 5450b57cec5SDimitry Andric // sort pointers in decreasing order to avoid potential livelock 5460b57cec5SDimitry Andric if (node->dn.mtx_locks[m] < info->mtx_lock) { 547fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(!node->dn.mtx_locks[node->dn.mtx_num_locks]); 5480b57cec5SDimitry Andric for (int n = node->dn.mtx_num_locks; n > m; --n) { 5490b57cec5SDimitry Andric // shift right all lesser non-NULL pointers 5500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL); 5510b57cec5SDimitry Andric node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1]; 5520b57cec5SDimitry Andric } 5530b57cec5SDimitry Andric node->dn.mtx_locks[m] = info->mtx_lock; 5540b57cec5SDimitry Andric break; 5550b57cec5SDimitry Andric } 5560b57cec5SDimitry Andric } 5570b57cec5SDimitry Andric KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop 5580b57cec5SDimitry Andric node->dn.mtx_num_locks++; 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric } 561fe6060f1SDimitry Andric } 5620b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, 5630b57cec5SDimitry Andric gtid, npredecessors)); 5640b57cec5SDimitry Andric return npredecessors; 5650b57cec5SDimitry Andric } 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric #define NO_DEP_BARRIER (false) 5680b57cec5SDimitry Andric #define DEP_BARRIER (true) 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric // returns true if the task has any outstanding dependence 5710b57cec5SDimitry Andric static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, 572489b1cf2SDimitry Andric kmp_task_t *task, kmp_dephash_t **hash, 5730b57cec5SDimitry Andric bool dep_barrier, kmp_int32 ndeps, 5740b57cec5SDimitry Andric kmp_depend_info_t *dep_list, 5750b57cec5SDimitry Andric kmp_int32 ndeps_noalias, 5760b57cec5SDimitry Andric kmp_depend_info_t *noalias_dep_list) { 577349cc55cSDimitry Andric int i, n_mtxs = 0, dep_all = 0; 5780b57cec5SDimitry Andric #if KMP_DEBUG 5790b57cec5SDimitry Andric kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 5800b57cec5SDimitry Andric #endif 581fe6060f1SDimitry Andric KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependences for task %p : %d " 582fe6060f1SDimitry Andric "possibly aliased dependences, %d non-aliased dependences : " 5830b57cec5SDimitry Andric "dep_barrier=%d .\n", 5840b57cec5SDimitry Andric gtid, taskdata, ndeps, ndeps_noalias, dep_barrier)); 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric // Filter deps in dep_list 5870b57cec5SDimitry Andric // TODO: Different algorithm for large dep_list ( > 10 ? ) 5880b57cec5SDimitry Andric for (i = 0; i < ndeps; i++) { 589349cc55cSDimitry Andric if (dep_list[i].base_addr != 0 && 590349cc55cSDimitry Andric dep_list[i].base_addr != (kmp_intptr_t)KMP_SIZE_T_MAX) { 591fe6060f1SDimitry Andric KMP_DEBUG_ASSERT( 592fe6060f1SDimitry Andric dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT || 593fe6060f1SDimitry Andric dep_list[i].flag == KMP_DEP_INOUT || 594fe6060f1SDimitry Andric dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET); 5950b57cec5SDimitry Andric for (int j = i + 1; j < ndeps; j++) { 5960b57cec5SDimitry Andric if (dep_list[i].base_addr == dep_list[j].base_addr) { 597fe6060f1SDimitry Andric if (dep_list[i].flag != dep_list[j].flag) { 598fe6060f1SDimitry Andric // two different dependences on same address work identical to OUT 599fe6060f1SDimitry Andric dep_list[i].flag = KMP_DEP_OUT; 600fe6060f1SDimitry Andric } 6010b57cec5SDimitry Andric dep_list[j].base_addr = 0; // Mark j element as void 6020b57cec5SDimitry Andric } 6030b57cec5SDimitry Andric } 604fe6060f1SDimitry Andric if (dep_list[i].flag == KMP_DEP_MTX) { 6050b57cec5SDimitry Andric // limit number of mtx deps to MAX_MTX_DEPS per node 6060b57cec5SDimitry Andric if (n_mtxs < MAX_MTX_DEPS && task != NULL) { 6070b57cec5SDimitry Andric ++n_mtxs; 6080b57cec5SDimitry Andric } else { 609fe6060f1SDimitry Andric dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout 6100b57cec5SDimitry Andric } 6110b57cec5SDimitry Andric } 612349cc55cSDimitry Andric } else if (dep_list[i].flag == KMP_DEP_ALL || 613349cc55cSDimitry Andric dep_list[i].base_addr == (kmp_intptr_t)KMP_SIZE_T_MAX) { 614349cc55cSDimitry Andric // omp_all_memory dependence can be marked by compiler by either 615349cc55cSDimitry Andric // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1). 616349cc55cSDimitry Andric // omp_all_memory overrides all other dependences if any 617349cc55cSDimitry Andric dep_all = 1; 618349cc55cSDimitry Andric break; 6190b57cec5SDimitry Andric } 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric // doesn't need to be atomic as no other thread is going to be accessing this 6230b57cec5SDimitry Andric // node just yet. 6240b57cec5SDimitry Andric // npredecessors is set -1 to ensure that none of the releasing tasks queues 625fe6060f1SDimitry Andric // this task before we have finished processing all the dependences 6260b57cec5SDimitry Andric node->dn.npredecessors = -1; 6270b57cec5SDimitry Andric 6280b57cec5SDimitry Andric // used to pack all npredecessors additions into a single atomic operation at 6290b57cec5SDimitry Andric // the end 6300b57cec5SDimitry Andric int npredecessors; 6310b57cec5SDimitry Andric 632349cc55cSDimitry Andric if (!dep_all) { // regular dependences 633349cc55cSDimitry Andric npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, 634349cc55cSDimitry Andric ndeps, dep_list, task); 6350b57cec5SDimitry Andric npredecessors += __kmp_process_deps<false>( 6360b57cec5SDimitry Andric gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task); 637349cc55cSDimitry Andric } else { // omp_all_memory dependence 638349cc55cSDimitry Andric npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task); 639349cc55cSDimitry Andric } 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric node->dn.task = task; 6420b57cec5SDimitry Andric KMP_MB(); 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric // Account for our initial fake value 6450b57cec5SDimitry Andric npredecessors++; 6460b57cec5SDimitry Andric 6470b57cec5SDimitry Andric // Update predecessors and obtain current value to check if there are still 6485ffd83dbSDimitry Andric // any outstanding dependences (some tasks may have finished while we 6495ffd83dbSDimitry Andric // processed the dependences) 6500b57cec5SDimitry Andric npredecessors = 6510b57cec5SDimitry Andric node->dn.npredecessors.fetch_add(npredecessors) + npredecessors; 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", 6540b57cec5SDimitry Andric gtid, npredecessors, taskdata)); 6550b57cec5SDimitry Andric 6560b57cec5SDimitry Andric // beyond this point the task could be queued (and executed) by a releasing 6570b57cec5SDimitry Andric // task... 6580b57cec5SDimitry Andric return npredecessors > 0 ? true : false; 6590b57cec5SDimitry Andric } 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric /*! 6620b57cec5SDimitry Andric @ingroup TASKING 6630b57cec5SDimitry Andric @param loc_ref location of the original task directive 6640b57cec5SDimitry Andric @param gtid Global Thread ID of encountering thread 6650b57cec5SDimitry Andric @param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new 6660b57cec5SDimitry Andric task'' 6670b57cec5SDimitry Andric @param ndeps Number of depend items with possible aliasing 6680b57cec5SDimitry Andric @param dep_list List of depend items with possible aliasing 6690b57cec5SDimitry Andric @param ndeps_noalias Number of depend items with no aliasing 6700b57cec5SDimitry Andric @param noalias_dep_list List of depend items with no aliasing 6710b57cec5SDimitry Andric 6720b57cec5SDimitry Andric @return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not 6735ffd83dbSDimitry Andric suspended and queued, or TASK_CURRENT_QUEUED if it was suspended and queued 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric Schedule a non-thread-switchable task with dependences for execution 6760b57cec5SDimitry Andric */ 6770b57cec5SDimitry Andric kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, 6780b57cec5SDimitry Andric kmp_task_t *new_task, kmp_int32 ndeps, 6790b57cec5SDimitry Andric kmp_depend_info_t *dep_list, 6800b57cec5SDimitry Andric kmp_int32 ndeps_noalias, 6810b57cec5SDimitry Andric kmp_depend_info_t *noalias_dep_list) { 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 6840b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid, 6850b57cec5SDimitry Andric loc_ref, new_taskdata)); 686e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 6870b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 6880b57cec5SDimitry Andric kmp_taskdata_t *current_task = thread->th.th_current_task; 6890b57cec5SDimitry Andric 69006c3fb27SDimitry Andric #if OMPX_TASKGRAPH 69106c3fb27SDimitry Andric // record TDG with deps 69206c3fb27SDimitry Andric if (new_taskdata->is_taskgraph && 69306c3fb27SDimitry Andric __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) { 69406c3fb27SDimitry Andric kmp_tdg_info_t *tdg = new_taskdata->tdg; 69506c3fb27SDimitry Andric // extend record_map if needed 69606c3fb27SDimitry Andric if (new_taskdata->td_task_id >= tdg->map_size) { 69706c3fb27SDimitry Andric __kmp_acquire_bootstrap_lock(&tdg->graph_lock); 69806c3fb27SDimitry Andric if (new_taskdata->td_task_id >= tdg->map_size) { 69906c3fb27SDimitry Andric kmp_uint old_size = tdg->map_size; 70006c3fb27SDimitry Andric kmp_uint new_size = old_size * 2; 70106c3fb27SDimitry Andric kmp_node_info_t *old_record = tdg->record_map; 70206c3fb27SDimitry Andric kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate( 70306c3fb27SDimitry Andric new_size * sizeof(kmp_node_info_t)); 70406c3fb27SDimitry Andric KMP_MEMCPY(new_record, tdg->record_map, 70506c3fb27SDimitry Andric old_size * sizeof(kmp_node_info_t)); 70606c3fb27SDimitry Andric tdg->record_map = new_record; 70706c3fb27SDimitry Andric 70806c3fb27SDimitry Andric __kmp_free(old_record); 70906c3fb27SDimitry Andric 71006c3fb27SDimitry Andric for (kmp_int i = old_size; i < new_size; i++) { 71106c3fb27SDimitry Andric kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate( 71206c3fb27SDimitry Andric __kmp_successors_size * sizeof(kmp_int32)); 71306c3fb27SDimitry Andric new_record[i].task = nullptr; 71406c3fb27SDimitry Andric new_record[i].successors = successorsList; 71506c3fb27SDimitry Andric new_record[i].nsuccessors = 0; 71606c3fb27SDimitry Andric new_record[i].npredecessors = 0; 71706c3fb27SDimitry Andric new_record[i].successors_size = __kmp_successors_size; 71806c3fb27SDimitry Andric KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0); 71906c3fb27SDimitry Andric } 72006c3fb27SDimitry Andric // update the size at the end, so that we avoid other 72106c3fb27SDimitry Andric // threads use old_record while map_size is already updated 72206c3fb27SDimitry Andric tdg->map_size = new_size; 72306c3fb27SDimitry Andric } 72406c3fb27SDimitry Andric __kmp_release_bootstrap_lock(&tdg->graph_lock); 72506c3fb27SDimitry Andric } 72606c3fb27SDimitry Andric tdg->record_map[new_taskdata->td_task_id].task = new_task; 72706c3fb27SDimitry Andric tdg->record_map[new_taskdata->td_task_id].parent_task = 72806c3fb27SDimitry Andric new_taskdata->td_parent; 72906c3fb27SDimitry Andric KMP_ATOMIC_INC(&tdg->num_tasks); 73006c3fb27SDimitry Andric } 73106c3fb27SDimitry Andric #endif 7320b57cec5SDimitry Andric #if OMPT_SUPPORT 7330b57cec5SDimitry Andric if (ompt_enabled.enabled) { 7340b57cec5SDimitry Andric if (!current_task->ompt_task_info.frame.enter_frame.ptr) 7350b57cec5SDimitry Andric current_task->ompt_task_info.frame.enter_frame.ptr = 7360b57cec5SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 7370b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_task_create) { 7380b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_create)( 739e8d8bef9SDimitry Andric &(current_task->ompt_task_info.task_data), 740e8d8bef9SDimitry Andric &(current_task->ompt_task_info.frame), 7410b57cec5SDimitry Andric &(new_taskdata->ompt_task_info.task_data), 742*0fca6ea1SDimitry Andric TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, 743e8d8bef9SDimitry Andric OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); 7440b57cec5SDimitry Andric } 7450b57cec5SDimitry Andric 746fe6060f1SDimitry Andric new_taskdata->ompt_task_info.frame.enter_frame.ptr = 747fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 7480b57cec5SDimitry Andric } 7490b57cec5SDimitry Andric 7500b57cec5SDimitry Andric #if OMPT_OPTIONAL 7510b57cec5SDimitry Andric /* OMPT grab all dependences if requested by the tool */ 752fe6060f1SDimitry Andric if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) { 7530b57cec5SDimitry Andric kmp_int32 i; 7540b57cec5SDimitry Andric 7555ffd83dbSDimitry Andric int ompt_ndeps = ndeps + ndeps_noalias; 7565ffd83dbSDimitry Andric ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC( 7570b57cec5SDimitry Andric thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t)); 7580b57cec5SDimitry Andric 7595ffd83dbSDimitry Andric KMP_ASSERT(ompt_deps != NULL); 7600b57cec5SDimitry Andric 7610b57cec5SDimitry Andric for (i = 0; i < ndeps; i++) { 7625ffd83dbSDimitry Andric ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr; 76306c3fb27SDimitry Andric if (dep_list[i].base_addr == KMP_SIZE_T_MAX) 76406c3fb27SDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_out_all_memory; 76506c3fb27SDimitry Andric else if (dep_list[i].flags.in && dep_list[i].flags.out) 7665ffd83dbSDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_inout; 7670b57cec5SDimitry Andric else if (dep_list[i].flags.out) 7685ffd83dbSDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_out; 7690b57cec5SDimitry Andric else if (dep_list[i].flags.in) 7705ffd83dbSDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_in; 7715ffd83dbSDimitry Andric else if (dep_list[i].flags.mtx) 7725ffd83dbSDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset; 773fe6060f1SDimitry Andric else if (dep_list[i].flags.set) 774fe6060f1SDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_inoutset; 77506c3fb27SDimitry Andric else if (dep_list[i].flags.all) 77606c3fb27SDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_out_all_memory; 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric for (i = 0; i < ndeps_noalias; i++) { 7795ffd83dbSDimitry Andric ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr; 78006c3fb27SDimitry Andric if (noalias_dep_list[i].base_addr == KMP_SIZE_T_MAX) 78106c3fb27SDimitry Andric ompt_deps[ndeps + i].dependence_type = 78206c3fb27SDimitry Andric ompt_dependence_type_out_all_memory; 78306c3fb27SDimitry Andric else if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) 7845ffd83dbSDimitry Andric ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout; 7850b57cec5SDimitry Andric else if (noalias_dep_list[i].flags.out) 7865ffd83dbSDimitry Andric ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out; 7870b57cec5SDimitry Andric else if (noalias_dep_list[i].flags.in) 7885ffd83dbSDimitry Andric ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in; 7895ffd83dbSDimitry Andric else if (noalias_dep_list[i].flags.mtx) 7905ffd83dbSDimitry Andric ompt_deps[ndeps + i].dependence_type = 7915ffd83dbSDimitry Andric ompt_dependence_type_mutexinoutset; 792fe6060f1SDimitry Andric else if (noalias_dep_list[i].flags.set) 793fe6060f1SDimitry Andric ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset; 79406c3fb27SDimitry Andric else if (noalias_dep_list[i].flags.all) 79506c3fb27SDimitry Andric ompt_deps[ndeps + i].dependence_type = 79606c3fb27SDimitry Andric ompt_dependence_type_out_all_memory; 7970b57cec5SDimitry Andric } 7980b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_dependences)( 7995ffd83dbSDimitry Andric &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps); 800fe6060f1SDimitry Andric /* We can now free the allocated memory for the dependences */ 8015ffd83dbSDimitry Andric /* For OMPD we might want to delay the free until end of this function */ 8025ffd83dbSDimitry Andric KMP_OMPT_DEPS_FREE(thread, ompt_deps); 8030b57cec5SDimitry Andric } 8040b57cec5SDimitry Andric #endif /* OMPT_OPTIONAL */ 8050b57cec5SDimitry Andric #endif /* OMPT_SUPPORT */ 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric bool serial = current_task->td_flags.team_serial || 8080b57cec5SDimitry Andric current_task->td_flags.tasking_ser || 8090b57cec5SDimitry Andric current_task->td_flags.final; 8100b57cec5SDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 811e8d8bef9SDimitry Andric serial = serial && 812e8d8bef9SDimitry Andric !(task_team && (task_team->tt.tt_found_proxy_tasks || 813e8d8bef9SDimitry Andric task_team->tt.tt_hidden_helper_task_encountered)); 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric if (!serial && (ndeps > 0 || ndeps_noalias > 0)) { 816fe6060f1SDimitry Andric /* if no dependences have been tracked yet, create the dependence hash */ 8170b57cec5SDimitry Andric if (current_task->td_dephash == NULL) 8180b57cec5SDimitry Andric current_task->td_dephash = __kmp_dephash_create(thread, current_task); 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andric #if USE_FAST_MEMORY 8210b57cec5SDimitry Andric kmp_depnode_t *node = 8220b57cec5SDimitry Andric (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t)); 8230b57cec5SDimitry Andric #else 8240b57cec5SDimitry Andric kmp_depnode_t *node = 8250b57cec5SDimitry Andric (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t)); 8260b57cec5SDimitry Andric #endif 8270b57cec5SDimitry Andric 8280b57cec5SDimitry Andric __kmp_init_node(node); 8290b57cec5SDimitry Andric new_taskdata->td_depnode = node; 8300b57cec5SDimitry Andric 831489b1cf2SDimitry Andric if (__kmp_check_deps(gtid, node, new_task, ¤t_task->td_dephash, 8320b57cec5SDimitry Andric NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias, 8330b57cec5SDimitry Andric noalias_dep_list)) { 8340b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " 835fe6060f1SDimitry Andric "dependences: " 8360b57cec5SDimitry Andric "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", 8370b57cec5SDimitry Andric gtid, loc_ref, new_taskdata)); 8380b57cec5SDimitry Andric #if OMPT_SUPPORT 8390b57cec5SDimitry Andric if (ompt_enabled.enabled) { 8400b57cec5SDimitry Andric current_task->ompt_task_info.frame.enter_frame = ompt_data_none; 8410b57cec5SDimitry Andric } 8420b57cec5SDimitry Andric #endif 8430b57cec5SDimitry Andric return TASK_CURRENT_NOT_QUEUED; 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric } else { 846fe6060f1SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences " 847fe6060f1SDimitry Andric "for task (serialized) loc=%p task=%p\n", 8480b57cec5SDimitry Andric gtid, loc_ref, new_taskdata)); 8490b57cec5SDimitry Andric } 8500b57cec5SDimitry Andric 8510b57cec5SDimitry Andric KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking " 852fe6060f1SDimitry Andric "dependences : " 8530b57cec5SDimitry Andric "loc=%p task=%p, transferring to __kmp_omp_task\n", 8540b57cec5SDimitry Andric gtid, loc_ref, new_taskdata)); 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric kmp_int32 ret = __kmp_omp_task(gtid, new_task, true); 8570b57cec5SDimitry Andric #if OMPT_SUPPORT 8580b57cec5SDimitry Andric if (ompt_enabled.enabled) { 8590b57cec5SDimitry Andric current_task->ompt_task_info.frame.enter_frame = ompt_data_none; 8600b57cec5SDimitry Andric } 8610b57cec5SDimitry Andric #endif 8620b57cec5SDimitry Andric return ret; 8630b57cec5SDimitry Andric } 8640b57cec5SDimitry Andric 8655ffd83dbSDimitry Andric #if OMPT_SUPPORT 8665ffd83dbSDimitry Andric void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task, 8675ffd83dbSDimitry Andric ompt_data_t *taskwait_task_data) { 8685ffd83dbSDimitry Andric if (ompt_enabled.ompt_callback_task_schedule) { 8695ffd83dbSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( 870fe6060f1SDimitry Andric taskwait_task_data, ompt_taskwait_complete, NULL); 8715ffd83dbSDimitry Andric } 8725ffd83dbSDimitry Andric current_task->ompt_task_info.frame.enter_frame.ptr = NULL; 8735ffd83dbSDimitry Andric *taskwait_task_data = ompt_data_none; 8745ffd83dbSDimitry Andric } 8755ffd83dbSDimitry Andric #endif /* OMPT_SUPPORT */ 8765ffd83dbSDimitry Andric 8770b57cec5SDimitry Andric /*! 8780b57cec5SDimitry Andric @ingroup TASKING 8790b57cec5SDimitry Andric @param loc_ref location of the original task directive 8800b57cec5SDimitry Andric @param gtid Global Thread ID of encountering thread 8810b57cec5SDimitry Andric @param ndeps Number of depend items with possible aliasing 8820b57cec5SDimitry Andric @param dep_list List of depend items with possible aliasing 8830b57cec5SDimitry Andric @param ndeps_noalias Number of depend items with no aliasing 8840b57cec5SDimitry Andric @param noalias_dep_list List of depend items with no aliasing 8850b57cec5SDimitry Andric 886fe6060f1SDimitry Andric Blocks the current task until all specifies dependences have been fulfilled. 8870b57cec5SDimitry Andric */ 8880b57cec5SDimitry Andric void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, 8890b57cec5SDimitry Andric kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 8900b57cec5SDimitry Andric kmp_depend_info_t *noalias_dep_list) { 891bdd1243dSDimitry Andric __kmpc_omp_taskwait_deps_51(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, 892bdd1243dSDimitry Andric noalias_dep_list, false); 893bdd1243dSDimitry Andric } 8940b57cec5SDimitry Andric 895bdd1243dSDimitry Andric /* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause. 896bdd1243dSDimitry Andric Placeholder for taskwait with nowait clause. 897bdd1243dSDimitry Andric Earlier code of __kmpc_omp_wait_deps() is now 898bdd1243dSDimitry Andric in this function. 899bdd1243dSDimitry Andric */ 900bdd1243dSDimitry Andric void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid, 901bdd1243dSDimitry Andric kmp_int32 ndeps, kmp_depend_info_t *dep_list, 902bdd1243dSDimitry Andric kmp_int32 ndeps_noalias, 903bdd1243dSDimitry Andric kmp_depend_info_t *noalias_dep_list, 904bdd1243dSDimitry Andric kmp_int32 has_no_wait) { 905bdd1243dSDimitry Andric KA_TRACE(10, ("__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n", 906bdd1243dSDimitry Andric gtid, loc_ref, has_no_wait)); 9070b57cec5SDimitry Andric if (ndeps == 0 && ndeps_noalias == 0) { 908bdd1243dSDimitry Andric KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to " 9090b57cec5SDimitry Andric "wait upon : loc=%p\n", 9100b57cec5SDimitry Andric gtid, loc_ref)); 9110b57cec5SDimitry Andric return; 9120b57cec5SDimitry Andric } 913e8d8bef9SDimitry Andric __kmp_assert_valid_gtid(gtid); 9140b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 9150b57cec5SDimitry Andric kmp_taskdata_t *current_task = thread->th.th_current_task; 9160b57cec5SDimitry Andric 9175ffd83dbSDimitry Andric #if OMPT_SUPPORT 9185ffd83dbSDimitry Andric // this function represents a taskwait construct with depend clause 9195ffd83dbSDimitry Andric // We signal 4 events: 9205ffd83dbSDimitry Andric // - creation of the taskwait task 9215ffd83dbSDimitry Andric // - dependences of the taskwait task 9225ffd83dbSDimitry Andric // - schedule and finish of the taskwait task 9235ffd83dbSDimitry Andric ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data; 9245ffd83dbSDimitry Andric KMP_ASSERT(taskwait_task_data->ptr == NULL); 9255ffd83dbSDimitry Andric if (ompt_enabled.enabled) { 9265ffd83dbSDimitry Andric if (!current_task->ompt_task_info.frame.enter_frame.ptr) 9275ffd83dbSDimitry Andric current_task->ompt_task_info.frame.enter_frame.ptr = 9285ffd83dbSDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 9295ffd83dbSDimitry Andric if (ompt_enabled.ompt_callback_task_create) { 9305ffd83dbSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_task_create)( 931e8d8bef9SDimitry Andric &(current_task->ompt_task_info.task_data), 932e8d8bef9SDimitry Andric &(current_task->ompt_task_info.frame), taskwait_task_data, 933fe6060f1SDimitry Andric ompt_task_taskwait | ompt_task_undeferred | ompt_task_mergeable, 1, 934e8d8bef9SDimitry Andric OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); 9355ffd83dbSDimitry Andric } 9365ffd83dbSDimitry Andric } 9375ffd83dbSDimitry Andric 9385ffd83dbSDimitry Andric #if OMPT_OPTIONAL 9395ffd83dbSDimitry Andric /* OMPT grab all dependences if requested by the tool */ 9405ffd83dbSDimitry Andric if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) { 9415ffd83dbSDimitry Andric kmp_int32 i; 9425ffd83dbSDimitry Andric 9435ffd83dbSDimitry Andric int ompt_ndeps = ndeps + ndeps_noalias; 9445ffd83dbSDimitry Andric ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC( 9455ffd83dbSDimitry Andric thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t)); 9465ffd83dbSDimitry Andric 9475ffd83dbSDimitry Andric KMP_ASSERT(ompt_deps != NULL); 9485ffd83dbSDimitry Andric 9495ffd83dbSDimitry Andric for (i = 0; i < ndeps; i++) { 9505ffd83dbSDimitry Andric ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr; 9515ffd83dbSDimitry Andric if (dep_list[i].flags.in && dep_list[i].flags.out) 9525ffd83dbSDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_inout; 9535ffd83dbSDimitry Andric else if (dep_list[i].flags.out) 9545ffd83dbSDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_out; 9555ffd83dbSDimitry Andric else if (dep_list[i].flags.in) 9565ffd83dbSDimitry Andric ompt_deps[i].dependence_type = ompt_dependence_type_in; 9575ffd83dbSDimitry Andric else if (dep_list[i].flags.mtx) 9585ffd83dbSDimitry Andric ompt_deps[ndeps + i].dependence_type = 9595ffd83dbSDimitry Andric ompt_dependence_type_mutexinoutset; 960fe6060f1SDimitry Andric else if (dep_list[i].flags.set) 961fe6060f1SDimitry Andric ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset; 9625ffd83dbSDimitry Andric } 9635ffd83dbSDimitry Andric for (i = 0; i < ndeps_noalias; i++) { 9645ffd83dbSDimitry Andric ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr; 9655ffd83dbSDimitry Andric if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) 9665ffd83dbSDimitry Andric ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout; 9675ffd83dbSDimitry Andric else if (noalias_dep_list[i].flags.out) 9685ffd83dbSDimitry Andric ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out; 9695ffd83dbSDimitry Andric else if (noalias_dep_list[i].flags.in) 9705ffd83dbSDimitry Andric ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in; 9715ffd83dbSDimitry Andric else if (noalias_dep_list[i].flags.mtx) 9725ffd83dbSDimitry Andric ompt_deps[ndeps + i].dependence_type = 9735ffd83dbSDimitry Andric ompt_dependence_type_mutexinoutset; 974fe6060f1SDimitry Andric else if (noalias_dep_list[i].flags.set) 975fe6060f1SDimitry Andric ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset; 9765ffd83dbSDimitry Andric } 9775ffd83dbSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_dependences)( 9785ffd83dbSDimitry Andric taskwait_task_data, ompt_deps, ompt_ndeps); 979fe6060f1SDimitry Andric /* We can now free the allocated memory for the dependences */ 9805ffd83dbSDimitry Andric /* For OMPD we might want to delay the free until end of this function */ 9815ffd83dbSDimitry Andric KMP_OMPT_DEPS_FREE(thread, ompt_deps); 9825ffd83dbSDimitry Andric ompt_deps = NULL; 9835ffd83dbSDimitry Andric } 9845ffd83dbSDimitry Andric #endif /* OMPT_OPTIONAL */ 9855ffd83dbSDimitry Andric #endif /* OMPT_SUPPORT */ 9865ffd83dbSDimitry Andric 9870b57cec5SDimitry Andric // We can return immediately as: 9880b57cec5SDimitry Andric // - dependences are not computed in serial teams (except with proxy tasks) 9890b57cec5SDimitry Andric // - if the dephash is not yet created it means we have nothing to wait for 9900b57cec5SDimitry Andric bool ignore = current_task->td_flags.team_serial || 9910b57cec5SDimitry Andric current_task->td_flags.tasking_ser || 9920b57cec5SDimitry Andric current_task->td_flags.final; 99304eeddc0SDimitry Andric ignore = 99404eeddc0SDimitry Andric ignore && thread->th.th_task_team != NULL && 99504eeddc0SDimitry Andric thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE && 99604eeddc0SDimitry Andric thread->th.th_task_team->tt.tt_hidden_helper_task_encountered == FALSE; 9970b57cec5SDimitry Andric ignore = ignore || current_task->td_dephash == NULL; 9980b57cec5SDimitry Andric 9990b57cec5SDimitry Andric if (ignore) { 1000bdd1243dSDimitry Andric KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " 1001fe6060f1SDimitry Andric "dependences : loc=%p\n", 10020b57cec5SDimitry Andric gtid, loc_ref)); 10035ffd83dbSDimitry Andric #if OMPT_SUPPORT 10045ffd83dbSDimitry Andric __ompt_taskwait_dep_finish(current_task, taskwait_task_data); 10055ffd83dbSDimitry Andric #endif /* OMPT_SUPPORT */ 10060b57cec5SDimitry Andric return; 10070b57cec5SDimitry Andric } 10080b57cec5SDimitry Andric 10090b57cec5SDimitry Andric kmp_depnode_t node = {0}; 10100b57cec5SDimitry Andric __kmp_init_node(&node); 10110b57cec5SDimitry Andric 1012489b1cf2SDimitry Andric if (!__kmp_check_deps(gtid, &node, NULL, ¤t_task->td_dephash, 10130b57cec5SDimitry Andric DEP_BARRIER, ndeps, dep_list, ndeps_noalias, 10140b57cec5SDimitry Andric noalias_dep_list)) { 1015bdd1243dSDimitry Andric KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " 1016fe6060f1SDimitry Andric "dependences : loc=%p\n", 10170b57cec5SDimitry Andric gtid, loc_ref)); 10185ffd83dbSDimitry Andric #if OMPT_SUPPORT 10195ffd83dbSDimitry Andric __ompt_taskwait_dep_finish(current_task, taskwait_task_data); 10205ffd83dbSDimitry Andric #endif /* OMPT_SUPPORT */ 10210b57cec5SDimitry Andric return; 10220b57cec5SDimitry Andric } 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andric int thread_finished = FALSE; 1025e8d8bef9SDimitry Andric kmp_flag_32<false, false> flag( 1026e8d8bef9SDimitry Andric (std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U); 10270b57cec5SDimitry Andric while (node.dn.npredecessors > 0) { 10280b57cec5SDimitry Andric flag.execute_tasks(thread, gtid, FALSE, 10290b57cec5SDimitry Andric &thread_finished USE_ITT_BUILD_ARG(NULL), 10300b57cec5SDimitry Andric __kmp_task_stealing_constraint); 10310b57cec5SDimitry Andric } 10320b57cec5SDimitry Andric 1033*0fca6ea1SDimitry Andric // Wait until the last __kmp_release_deps is finished before we free the 1034*0fca6ea1SDimitry Andric // current stack frame holding the "node" variable; once its nrefs count 1035*0fca6ea1SDimitry Andric // reaches 1, we're sure nobody else can try to reference it again. 1036*0fca6ea1SDimitry Andric while (node.dn.nrefs > 1) 1037*0fca6ea1SDimitry Andric KMP_YIELD(TRUE); 1038*0fca6ea1SDimitry Andric 10395ffd83dbSDimitry Andric #if OMPT_SUPPORT 10405ffd83dbSDimitry Andric __ompt_taskwait_dep_finish(current_task, taskwait_task_data); 10415ffd83dbSDimitry Andric #endif /* OMPT_SUPPORT */ 1042bdd1243dSDimitry Andric KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p\ 1043bdd1243dSDimitry Andric \n", 10440b57cec5SDimitry Andric gtid, loc_ref)); 10450b57cec5SDimitry Andric } 1046