xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_barrier.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_barrier.cpp
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "kmp_wait_release.h"
14349cc55cSDimitry Andric #include "kmp_barrier.h"
150b57cec5SDimitry Andric #include "kmp_itt.h"
160b57cec5SDimitry Andric #include "kmp_os.h"
170b57cec5SDimitry Andric #include "kmp_stats.h"
180b57cec5SDimitry Andric #include "ompt-specific.h"
19349cc55cSDimitry Andric // for distributed barrier
20349cc55cSDimitry Andric #include "kmp_affinity.h"
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric #if KMP_MIC
230b57cec5SDimitry Andric #include <immintrin.h>
240b57cec5SDimitry Andric #define USE_NGO_STORES 1
250b57cec5SDimitry Andric #endif // KMP_MIC
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric #if KMP_MIC && USE_NGO_STORES
280b57cec5SDimitry Andric // ICV copying
290b57cec5SDimitry Andric #define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
300b57cec5SDimitry Andric #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
310b57cec5SDimitry Andric #define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
320b57cec5SDimitry Andric #define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
330b57cec5SDimitry Andric #else
340b57cec5SDimitry Andric #define ngo_load(src) ((void)0)
350b57cec5SDimitry Andric #define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
360b57cec5SDimitry Andric #define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
370b57cec5SDimitry Andric #define ngo_sync() ((void)0)
380b57cec5SDimitry Andric #endif /* KMP_MIC && USE_NGO_STORES */
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric void __kmp_print_structure(void); // Forward declaration
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric // ---------------------------- Barrier Algorithms ----------------------------
43349cc55cSDimitry Andric // Distributed barrier
44349cc55cSDimitry Andric 
45349cc55cSDimitry Andric // Compute how many threads to have polling each cache-line.
46349cc55cSDimitry Andric // We want to limit the number of writes to IDEAL_GO_RESOLUTION.
47349cc55cSDimitry Andric void distributedBarrier::computeVarsForN(size_t n) {
48349cc55cSDimitry Andric   int nsockets = 1;
49349cc55cSDimitry Andric   if (__kmp_topology) {
50349cc55cSDimitry Andric     int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
51349cc55cSDimitry Andric     int core_level = __kmp_topology->get_level(KMP_HW_CORE);
52349cc55cSDimitry Andric     int ncores_per_socket =
53349cc55cSDimitry Andric         __kmp_topology->calculate_ratio(core_level, socket_level);
54349cc55cSDimitry Andric     nsockets = __kmp_topology->get_count(socket_level);
55349cc55cSDimitry Andric 
56349cc55cSDimitry Andric     if (nsockets <= 0)
57349cc55cSDimitry Andric       nsockets = 1;
58349cc55cSDimitry Andric     if (ncores_per_socket <= 0)
59349cc55cSDimitry Andric       ncores_per_socket = 1;
60349cc55cSDimitry Andric 
61349cc55cSDimitry Andric     threads_per_go = ncores_per_socket >> 1;
62349cc55cSDimitry Andric     if (!fix_threads_per_go) {
63349cc55cSDimitry Andric       // Minimize num_gos
64349cc55cSDimitry Andric       if (threads_per_go > 4) {
65349cc55cSDimitry Andric         if (KMP_OPTIMIZE_FOR_REDUCTIONS) {
66349cc55cSDimitry Andric           threads_per_go = threads_per_go >> 1;
67349cc55cSDimitry Andric         }
68349cc55cSDimitry Andric         if (threads_per_go > 4 && nsockets == 1)
69349cc55cSDimitry Andric           threads_per_go = threads_per_go >> 1;
70349cc55cSDimitry Andric       }
71349cc55cSDimitry Andric     }
72349cc55cSDimitry Andric     if (threads_per_go == 0)
73349cc55cSDimitry Andric       threads_per_go = 1;
74349cc55cSDimitry Andric     fix_threads_per_go = true;
75349cc55cSDimitry Andric     num_gos = n / threads_per_go;
76349cc55cSDimitry Andric     if (n % threads_per_go)
77349cc55cSDimitry Andric       num_gos++;
78349cc55cSDimitry Andric     if (nsockets == 1 || num_gos == 1)
79349cc55cSDimitry Andric       num_groups = 1;
80349cc55cSDimitry Andric     else {
81349cc55cSDimitry Andric       num_groups = num_gos / nsockets;
82349cc55cSDimitry Andric       if (num_gos % nsockets)
83349cc55cSDimitry Andric         num_groups++;
84349cc55cSDimitry Andric     }
85349cc55cSDimitry Andric     if (num_groups <= 0)
86349cc55cSDimitry Andric       num_groups = 1;
87349cc55cSDimitry Andric     gos_per_group = num_gos / num_groups;
88349cc55cSDimitry Andric     if (num_gos % num_groups)
89349cc55cSDimitry Andric       gos_per_group++;
90349cc55cSDimitry Andric     threads_per_group = threads_per_go * gos_per_group;
91349cc55cSDimitry Andric   } else {
92349cc55cSDimitry Andric     num_gos = n / threads_per_go;
93349cc55cSDimitry Andric     if (n % threads_per_go)
94349cc55cSDimitry Andric       num_gos++;
95349cc55cSDimitry Andric     if (num_gos == 1)
96349cc55cSDimitry Andric       num_groups = 1;
97349cc55cSDimitry Andric     else {
98349cc55cSDimitry Andric       num_groups = num_gos / 2;
99349cc55cSDimitry Andric       if (num_gos % 2)
100349cc55cSDimitry Andric         num_groups++;
101349cc55cSDimitry Andric     }
102349cc55cSDimitry Andric     gos_per_group = num_gos / num_groups;
103349cc55cSDimitry Andric     if (num_gos % num_groups)
104349cc55cSDimitry Andric       gos_per_group++;
105349cc55cSDimitry Andric     threads_per_group = threads_per_go * gos_per_group;
106349cc55cSDimitry Andric   }
107349cc55cSDimitry Andric }
108349cc55cSDimitry Andric 
109349cc55cSDimitry Andric void distributedBarrier::computeGo(size_t n) {
110349cc55cSDimitry Andric   // Minimize num_gos
111349cc55cSDimitry Andric   for (num_gos = 1;; num_gos++)
112349cc55cSDimitry Andric     if (IDEAL_CONTENTION * num_gos >= n)
113349cc55cSDimitry Andric       break;
114349cc55cSDimitry Andric   threads_per_go = n / num_gos;
115349cc55cSDimitry Andric   if (n % num_gos)
116349cc55cSDimitry Andric     threads_per_go++;
117349cc55cSDimitry Andric   while (num_gos > MAX_GOS) {
118349cc55cSDimitry Andric     threads_per_go++;
119349cc55cSDimitry Andric     num_gos = n / threads_per_go;
120349cc55cSDimitry Andric     if (n % threads_per_go)
121349cc55cSDimitry Andric       num_gos++;
122349cc55cSDimitry Andric   }
123349cc55cSDimitry Andric   computeVarsForN(n);
124349cc55cSDimitry Andric }
125349cc55cSDimitry Andric 
126349cc55cSDimitry Andric // This function is to resize the barrier arrays when the new number of threads
127349cc55cSDimitry Andric // exceeds max_threads, which is the current size of all the arrays
128349cc55cSDimitry Andric void distributedBarrier::resize(size_t nthr) {
129349cc55cSDimitry Andric   KMP_DEBUG_ASSERT(nthr > max_threads);
130349cc55cSDimitry Andric 
131349cc55cSDimitry Andric   // expand to requested size * 2
132349cc55cSDimitry Andric   max_threads = nthr * 2;
133349cc55cSDimitry Andric 
134349cc55cSDimitry Andric   // allocate arrays to new max threads
135349cc55cSDimitry Andric   for (int i = 0; i < MAX_ITERS; ++i) {
136349cc55cSDimitry Andric     if (flags[i])
137349cc55cSDimitry Andric       flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],
138349cc55cSDimitry Andric                                                  max_threads * sizeof(flags_s));
139349cc55cSDimitry Andric     else
140349cc55cSDimitry Andric       flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s));
141349cc55cSDimitry Andric   }
142349cc55cSDimitry Andric 
143349cc55cSDimitry Andric   if (go)
144349cc55cSDimitry Andric     go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s));
145349cc55cSDimitry Andric   else
146349cc55cSDimitry Andric     go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s));
147349cc55cSDimitry Andric 
148349cc55cSDimitry Andric   if (iter)
149349cc55cSDimitry Andric     iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s));
150349cc55cSDimitry Andric   else
151349cc55cSDimitry Andric     iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s));
152349cc55cSDimitry Andric 
153349cc55cSDimitry Andric   if (sleep)
154349cc55cSDimitry Andric     sleep =
155349cc55cSDimitry Andric         (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s));
156349cc55cSDimitry Andric   else
157349cc55cSDimitry Andric     sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s));
158349cc55cSDimitry Andric }
159349cc55cSDimitry Andric 
160349cc55cSDimitry Andric // This function is to set all the go flags that threads might be waiting
161349cc55cSDimitry Andric // on, and when blocktime is not infinite, it should be followed by a wake-up
162349cc55cSDimitry Andric // call to each thread
163349cc55cSDimitry Andric kmp_uint64 distributedBarrier::go_release() {
164349cc55cSDimitry Andric   kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
165349cc55cSDimitry Andric   for (size_t j = 0; j < num_gos; j++) {
166349cc55cSDimitry Andric     go[j].go.store(next_go);
167349cc55cSDimitry Andric   }
168349cc55cSDimitry Andric   return next_go;
169349cc55cSDimitry Andric }
170349cc55cSDimitry Andric 
171349cc55cSDimitry Andric void distributedBarrier::go_reset() {
172349cc55cSDimitry Andric   for (size_t j = 0; j < max_threads; ++j) {
173349cc55cSDimitry Andric     for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
174349cc55cSDimitry Andric       flags[i][j].stillNeed = 1;
175349cc55cSDimitry Andric     }
176349cc55cSDimitry Andric     go[j].go.store(0);
177349cc55cSDimitry Andric     iter[j].iter = 0;
178349cc55cSDimitry Andric   }
179349cc55cSDimitry Andric }
180349cc55cSDimitry Andric 
181349cc55cSDimitry Andric // This function inits/re-inits the distributed barrier for a particular number
182349cc55cSDimitry Andric // of threads. If a resize of arrays is needed, it calls the resize function.
183349cc55cSDimitry Andric void distributedBarrier::init(size_t nthr) {
184349cc55cSDimitry Andric   size_t old_max = max_threads;
185349cc55cSDimitry Andric   if (nthr > max_threads) { // need more space in arrays
186349cc55cSDimitry Andric     resize(nthr);
187349cc55cSDimitry Andric   }
188349cc55cSDimitry Andric 
189349cc55cSDimitry Andric   for (size_t i = 0; i < max_threads; i++) {
190349cc55cSDimitry Andric     for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
191349cc55cSDimitry Andric       flags[j][i].stillNeed = 1;
192349cc55cSDimitry Andric     }
193349cc55cSDimitry Andric     go[i].go.store(0);
194349cc55cSDimitry Andric     iter[i].iter = 0;
195349cc55cSDimitry Andric     if (i >= old_max)
196349cc55cSDimitry Andric       sleep[i].sleep = false;
197349cc55cSDimitry Andric   }
198349cc55cSDimitry Andric 
199349cc55cSDimitry Andric   // Recalculate num_gos, etc. based on new nthr
200349cc55cSDimitry Andric   computeVarsForN(nthr);
201349cc55cSDimitry Andric 
202349cc55cSDimitry Andric   num_threads = nthr;
203349cc55cSDimitry Andric 
204349cc55cSDimitry Andric   if (team_icvs == NULL)
205349cc55cSDimitry Andric     team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t));
206349cc55cSDimitry Andric }
207349cc55cSDimitry Andric 
208349cc55cSDimitry Andric // This function is used only when KMP_BLOCKTIME is not infinite.
209349cc55cSDimitry Andric // static
210349cc55cSDimitry Andric void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team,
211349cc55cSDimitry Andric                                size_t start, size_t stop, size_t inc,
212349cc55cSDimitry Andric                                size_t tid) {
213349cc55cSDimitry Andric   KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME);
214349cc55cSDimitry Andric   if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
215349cc55cSDimitry Andric     return;
216349cc55cSDimitry Andric 
217349cc55cSDimitry Andric   kmp_info_t **other_threads = team->t.t_threads;
218349cc55cSDimitry Andric   for (size_t thr = start; thr < stop; thr += inc) {
219349cc55cSDimitry Andric     KMP_DEBUG_ASSERT(other_threads[thr]);
220349cc55cSDimitry Andric     int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
221349cc55cSDimitry Andric     // Wake up worker regardless of if it appears to be sleeping or not
222349cc55cSDimitry Andric     __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL);
223349cc55cSDimitry Andric   }
224349cc55cSDimitry Andric }
225349cc55cSDimitry Andric 
226349cc55cSDimitry Andric static void __kmp_dist_barrier_gather(
227349cc55cSDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
228349cc55cSDimitry Andric     void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
229349cc55cSDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather);
230349cc55cSDimitry Andric   kmp_team_t *team;
231349cc55cSDimitry Andric   distributedBarrier *b;
232349cc55cSDimitry Andric   kmp_info_t **other_threads;
233349cc55cSDimitry Andric   kmp_uint64 my_current_iter, my_next_iter;
234349cc55cSDimitry Andric   kmp_uint32 nproc;
235349cc55cSDimitry Andric   bool group_leader;
236349cc55cSDimitry Andric 
237349cc55cSDimitry Andric   team = this_thr->th.th_team;
238349cc55cSDimitry Andric   nproc = this_thr->th.th_team_nproc;
239349cc55cSDimitry Andric   other_threads = team->t.t_threads;
240349cc55cSDimitry Andric   b = team->t.b;
241349cc55cSDimitry Andric   my_current_iter = b->iter[tid].iter;
242349cc55cSDimitry Andric   my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
243349cc55cSDimitry Andric   group_leader = ((tid % b->threads_per_group) == 0);
244349cc55cSDimitry Andric 
245349cc55cSDimitry Andric   KA_TRACE(20,
246349cc55cSDimitry Andric            ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",
247349cc55cSDimitry Andric             gtid, team->t.t_id, tid, bt));
248349cc55cSDimitry Andric 
249349cc55cSDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
250349cc55cSDimitry Andric   // Barrier imbalance - save arrive time to the thread
251349cc55cSDimitry Andric   if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
252349cc55cSDimitry Andric     this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
253349cc55cSDimitry Andric         __itt_get_timestamp();
254349cc55cSDimitry Andric   }
255349cc55cSDimitry Andric #endif
256349cc55cSDimitry Andric 
257349cc55cSDimitry Andric   if (group_leader) {
258349cc55cSDimitry Andric     // Start from the thread after the group leader
259349cc55cSDimitry Andric     size_t group_start = tid + 1;
260349cc55cSDimitry Andric     size_t group_end = tid + b->threads_per_group;
261349cc55cSDimitry Andric     size_t threads_pending = 0;
262349cc55cSDimitry Andric 
263349cc55cSDimitry Andric     if (group_end > nproc)
264349cc55cSDimitry Andric       group_end = nproc;
265349cc55cSDimitry Andric     do { // wait for threads in my group
266349cc55cSDimitry Andric       threads_pending = 0;
267349cc55cSDimitry Andric       // Check all the flags every time to avoid branch misspredict
268349cc55cSDimitry Andric       for (size_t thr = group_start; thr < group_end; thr++) {
269349cc55cSDimitry Andric         // Each thread uses a different cache line
270349cc55cSDimitry Andric         threads_pending += b->flags[my_current_iter][thr].stillNeed;
271349cc55cSDimitry Andric       }
272349cc55cSDimitry Andric       // Execute tasks here
273349cc55cSDimitry Andric       if (__kmp_tasking_mode != tskm_immediate_exec) {
274349cc55cSDimitry Andric         kmp_task_team_t *task_team = this_thr->th.th_task_team;
275349cc55cSDimitry Andric         if (task_team != NULL) {
276349cc55cSDimitry Andric           if (TCR_SYNC_4(task_team->tt.tt_active)) {
277349cc55cSDimitry Andric             if (KMP_TASKING_ENABLED(task_team)) {
278349cc55cSDimitry Andric               int tasks_completed = FALSE;
279349cc55cSDimitry Andric               __kmp_atomic_execute_tasks_64(
280349cc55cSDimitry Andric                   this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
281349cc55cSDimitry Andric                   &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
282349cc55cSDimitry Andric             } else
283349cc55cSDimitry Andric               this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
284349cc55cSDimitry Andric           }
285349cc55cSDimitry Andric         } else {
286349cc55cSDimitry Andric           this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
287349cc55cSDimitry Andric         } // if
288349cc55cSDimitry Andric       }
289349cc55cSDimitry Andric       if (TCR_4(__kmp_global.g.g_done)) {
290349cc55cSDimitry Andric         if (__kmp_global.g.g_abort)
291349cc55cSDimitry Andric           __kmp_abort_thread();
292349cc55cSDimitry Andric         break;
293349cc55cSDimitry Andric       } else if (__kmp_tasking_mode != tskm_immediate_exec &&
294349cc55cSDimitry Andric                  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
295349cc55cSDimitry Andric         this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
296349cc55cSDimitry Andric       }
297349cc55cSDimitry Andric     } while (threads_pending > 0);
298349cc55cSDimitry Andric 
299349cc55cSDimitry Andric     if (reduce) { // Perform reduction if needed
300349cc55cSDimitry Andric       OMPT_REDUCTION_DECL(this_thr, gtid);
301349cc55cSDimitry Andric       OMPT_REDUCTION_BEGIN;
302349cc55cSDimitry Andric       // Group leader reduces all threads in group
303349cc55cSDimitry Andric       for (size_t thr = group_start; thr < group_end; thr++) {
304349cc55cSDimitry Andric         (*reduce)(this_thr->th.th_local.reduce_data,
305349cc55cSDimitry Andric                   other_threads[thr]->th.th_local.reduce_data);
306349cc55cSDimitry Andric       }
307349cc55cSDimitry Andric       OMPT_REDUCTION_END;
308349cc55cSDimitry Andric     }
309349cc55cSDimitry Andric 
310349cc55cSDimitry Andric     // Set flag for next iteration
311349cc55cSDimitry Andric     b->flags[my_next_iter][tid].stillNeed = 1;
312349cc55cSDimitry Andric     // Each thread uses a different cache line; resets stillNeed to 0 to
313349cc55cSDimitry Andric     // indicate it has reached the barrier
314349cc55cSDimitry Andric     b->flags[my_current_iter][tid].stillNeed = 0;
315349cc55cSDimitry Andric 
316349cc55cSDimitry Andric     do { // wait for all group leaders
317349cc55cSDimitry Andric       threads_pending = 0;
318349cc55cSDimitry Andric       for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
319349cc55cSDimitry Andric         threads_pending += b->flags[my_current_iter][thr].stillNeed;
320349cc55cSDimitry Andric       }
321349cc55cSDimitry Andric       // Execute tasks here
322349cc55cSDimitry Andric       if (__kmp_tasking_mode != tskm_immediate_exec) {
323349cc55cSDimitry Andric         kmp_task_team_t *task_team = this_thr->th.th_task_team;
324349cc55cSDimitry Andric         if (task_team != NULL) {
325349cc55cSDimitry Andric           if (TCR_SYNC_4(task_team->tt.tt_active)) {
326349cc55cSDimitry Andric             if (KMP_TASKING_ENABLED(task_team)) {
327349cc55cSDimitry Andric               int tasks_completed = FALSE;
328349cc55cSDimitry Andric               __kmp_atomic_execute_tasks_64(
329349cc55cSDimitry Andric                   this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
330349cc55cSDimitry Andric                   &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
331349cc55cSDimitry Andric             } else
332349cc55cSDimitry Andric               this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
333349cc55cSDimitry Andric           }
334349cc55cSDimitry Andric         } else {
335349cc55cSDimitry Andric           this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
336349cc55cSDimitry Andric         } // if
337349cc55cSDimitry Andric       }
338349cc55cSDimitry Andric       if (TCR_4(__kmp_global.g.g_done)) {
339349cc55cSDimitry Andric         if (__kmp_global.g.g_abort)
340349cc55cSDimitry Andric           __kmp_abort_thread();
341349cc55cSDimitry Andric         break;
342349cc55cSDimitry Andric       } else if (__kmp_tasking_mode != tskm_immediate_exec &&
343349cc55cSDimitry Andric                  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
344349cc55cSDimitry Andric         this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
345349cc55cSDimitry Andric       }
346349cc55cSDimitry Andric     } while (threads_pending > 0);
347349cc55cSDimitry Andric 
348349cc55cSDimitry Andric     if (reduce) { // Perform reduction if needed
349349cc55cSDimitry Andric       if (KMP_MASTER_TID(tid)) { // Master reduces over group leaders
350349cc55cSDimitry Andric         OMPT_REDUCTION_DECL(this_thr, gtid);
351349cc55cSDimitry Andric         OMPT_REDUCTION_BEGIN;
352349cc55cSDimitry Andric         for (size_t thr = b->threads_per_group; thr < nproc;
353349cc55cSDimitry Andric              thr += b->threads_per_group) {
354349cc55cSDimitry Andric           (*reduce)(this_thr->th.th_local.reduce_data,
355349cc55cSDimitry Andric                     other_threads[thr]->th.th_local.reduce_data);
356349cc55cSDimitry Andric         }
357349cc55cSDimitry Andric         OMPT_REDUCTION_END;
358349cc55cSDimitry Andric       }
359349cc55cSDimitry Andric     }
360349cc55cSDimitry Andric   } else {
361349cc55cSDimitry Andric     // Set flag for next iteration
362349cc55cSDimitry Andric     b->flags[my_next_iter][tid].stillNeed = 1;
363349cc55cSDimitry Andric     // Each thread uses a different cache line; resets stillNeed to 0 to
364349cc55cSDimitry Andric     // indicate it has reached the barrier
365349cc55cSDimitry Andric     b->flags[my_current_iter][tid].stillNeed = 0;
366349cc55cSDimitry Andric   }
367349cc55cSDimitry Andric 
368349cc55cSDimitry Andric   KMP_MFENCE();
369349cc55cSDimitry Andric 
370349cc55cSDimitry Andric   KA_TRACE(20,
371349cc55cSDimitry Andric            ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
372349cc55cSDimitry Andric             gtid, team->t.t_id, tid, bt));
373349cc55cSDimitry Andric }
374349cc55cSDimitry Andric 
375349cc55cSDimitry Andric static void __kmp_dist_barrier_release(
376349cc55cSDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
377349cc55cSDimitry Andric     int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
378349cc55cSDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release);
379349cc55cSDimitry Andric   kmp_team_t *team;
380349cc55cSDimitry Andric   distributedBarrier *b;
381349cc55cSDimitry Andric   kmp_bstate_t *thr_bar;
382349cc55cSDimitry Andric   kmp_uint64 my_current_iter, next_go;
383349cc55cSDimitry Andric   size_t my_go_index;
384349cc55cSDimitry Andric   bool group_leader;
385349cc55cSDimitry Andric 
386349cc55cSDimitry Andric   KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",
387349cc55cSDimitry Andric                 gtid, tid, bt));
388349cc55cSDimitry Andric 
389349cc55cSDimitry Andric   thr_bar = &this_thr->th.th_bar[bt].bb;
390349cc55cSDimitry Andric 
391349cc55cSDimitry Andric   if (!KMP_MASTER_TID(tid)) {
392349cc55cSDimitry Andric     // workers and non-master group leaders need to check their presence in team
393349cc55cSDimitry Andric     do {
394349cc55cSDimitry Andric       if (this_thr->th.th_used_in_team.load() != 1 &&
395349cc55cSDimitry Andric           this_thr->th.th_used_in_team.load() != 3) {
396349cc55cSDimitry Andric         // Thread is not in use in a team. Wait on location in tid's thread
397349cc55cSDimitry Andric         // struct. The 0 value tells anyone looking that this thread is spinning
398349cc55cSDimitry Andric         // or sleeping until this location becomes 3 again; 3 is the transition
399349cc55cSDimitry Andric         // state to get to 1 which is waiting on go and being in the team
400349cc55cSDimitry Andric         kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
401349cc55cSDimitry Andric         if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,
402349cc55cSDimitry Andric                                         0) ||
403349cc55cSDimitry Andric             this_thr->th.th_used_in_team.load() == 0) {
404349cc55cSDimitry Andric           my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
405349cc55cSDimitry Andric         }
406349cc55cSDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
407349cc55cSDimitry Andric         if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
408349cc55cSDimitry Andric           // In fork barrier where we could not get the object reliably
409349cc55cSDimitry Andric           itt_sync_obj =
410349cc55cSDimitry Andric               __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
411349cc55cSDimitry Andric           // Cancel wait on previous parallel region...
412349cc55cSDimitry Andric           __kmp_itt_task_starting(itt_sync_obj);
413349cc55cSDimitry Andric 
414349cc55cSDimitry Andric           if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
415349cc55cSDimitry Andric             return;
416349cc55cSDimitry Andric 
417349cc55cSDimitry Andric           itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
418349cc55cSDimitry Andric           if (itt_sync_obj != NULL)
419349cc55cSDimitry Andric             // Call prepare as early as possible for "new" barrier
420349cc55cSDimitry Andric             __kmp_itt_task_finished(itt_sync_obj);
421349cc55cSDimitry Andric         } else
422349cc55cSDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
423349cc55cSDimitry Andric             if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
424349cc55cSDimitry Andric           return;
425349cc55cSDimitry Andric       }
426349cc55cSDimitry Andric       if (this_thr->th.th_used_in_team.load() != 1 &&
427349cc55cSDimitry Andric           this_thr->th.th_used_in_team.load() != 3) // spurious wake-up?
428349cc55cSDimitry Andric         continue;
429349cc55cSDimitry Andric       if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
430349cc55cSDimitry Andric         return;
431349cc55cSDimitry Andric 
432349cc55cSDimitry Andric       // At this point, the thread thinks it is in use in a team, or in
433349cc55cSDimitry Andric       // transition to be used in a team, but it might have reached this barrier
434349cc55cSDimitry Andric       // before it was marked unused by the team. Unused threads are awoken and
435349cc55cSDimitry Andric       // shifted to wait on local thread struct elsewhere. It also might reach
436349cc55cSDimitry Andric       // this point by being picked up for use by a different team. Either way,
437349cc55cSDimitry Andric       // we need to update the tid.
438349cc55cSDimitry Andric       tid = __kmp_tid_from_gtid(gtid);
439349cc55cSDimitry Andric       team = this_thr->th.th_team;
440349cc55cSDimitry Andric       KMP_DEBUG_ASSERT(tid >= 0);
441349cc55cSDimitry Andric       KMP_DEBUG_ASSERT(team);
442349cc55cSDimitry Andric       b = team->t.b;
443349cc55cSDimitry Andric       my_current_iter = b->iter[tid].iter;
444349cc55cSDimitry Andric       next_go = my_current_iter + distributedBarrier::MAX_ITERS;
445349cc55cSDimitry Andric       my_go_index = tid / b->threads_per_go;
446349cc55cSDimitry Andric       if (this_thr->th.th_used_in_team.load() == 3) {
447349cc55cSDimitry Andric         KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1);
448349cc55cSDimitry Andric       }
449349cc55cSDimitry Andric       // Check if go flag is set
450349cc55cSDimitry Andric       if (b->go[my_go_index].go.load() != next_go) {
451349cc55cSDimitry Andric         // Wait on go flag on team
452349cc55cSDimitry Andric         kmp_atomic_flag_64<false, true> my_flag(
453349cc55cSDimitry Andric             &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
454349cc55cSDimitry Andric         my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
455349cc55cSDimitry Andric         KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||
456349cc55cSDimitry Andric                          b->iter[tid].iter == 0);
457349cc55cSDimitry Andric         KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false);
458349cc55cSDimitry Andric       }
459349cc55cSDimitry Andric 
460349cc55cSDimitry Andric       if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
461349cc55cSDimitry Andric         return;
462349cc55cSDimitry Andric       // At this point, the thread's go location was set. This means the primary
463349cc55cSDimitry Andric       // thread is safely in the barrier, and so this thread's data is
464349cc55cSDimitry Andric       // up-to-date, but we should check again that this thread is really in
465349cc55cSDimitry Andric       // use in the team, as it could have been woken up for the purpose of
466349cc55cSDimitry Andric       // changing team size, or reaping threads at shutdown.
467349cc55cSDimitry Andric       if (this_thr->th.th_used_in_team.load() == 1)
468349cc55cSDimitry Andric         break;
469349cc55cSDimitry Andric     } while (1);
470349cc55cSDimitry Andric 
471349cc55cSDimitry Andric     if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
472349cc55cSDimitry Andric       return;
473349cc55cSDimitry Andric 
474349cc55cSDimitry Andric     group_leader = ((tid % b->threads_per_group) == 0);
475349cc55cSDimitry Andric     if (group_leader) {
476349cc55cSDimitry Andric       // Tell all the threads in my group they can go!
477349cc55cSDimitry Andric       for (size_t go_idx = my_go_index + 1;
478349cc55cSDimitry Andric            go_idx < my_go_index + b->gos_per_group; go_idx++) {
479349cc55cSDimitry Andric         b->go[go_idx].go.store(next_go);
480349cc55cSDimitry Andric       }
481349cc55cSDimitry Andric       // Fence added so that workers can see changes to go. sfence inadequate.
482349cc55cSDimitry Andric       KMP_MFENCE();
483349cc55cSDimitry Andric     }
484349cc55cSDimitry Andric 
485349cc55cSDimitry Andric #if KMP_BARRIER_ICV_PUSH
486349cc55cSDimitry Andric     if (propagate_icvs) { // copy ICVs to final dest
487349cc55cSDimitry Andric       __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
488349cc55cSDimitry Andric                                tid, FALSE);
489349cc55cSDimitry Andric       copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
490349cc55cSDimitry Andric                 (kmp_internal_control_t *)team->t.b->team_icvs);
491349cc55cSDimitry Andric       copy_icvs(&thr_bar->th_fixed_icvs,
492349cc55cSDimitry Andric                 &team->t.t_implicit_task_taskdata[tid].td_icvs);
493349cc55cSDimitry Andric     }
494349cc55cSDimitry Andric #endif
495349cc55cSDimitry Andric     if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) {
496349cc55cSDimitry Andric       // This thread is now awake and participating in the barrier;
497349cc55cSDimitry Andric       // wake up the other threads in the group
498349cc55cSDimitry Andric       size_t nproc = this_thr->th.th_team_nproc;
499349cc55cSDimitry Andric       size_t group_end = tid + b->threads_per_group;
500349cc55cSDimitry Andric       if (nproc < group_end)
501349cc55cSDimitry Andric         group_end = nproc;
502349cc55cSDimitry Andric       __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
503349cc55cSDimitry Andric     }
504349cc55cSDimitry Andric   } else { //  Primary thread
505349cc55cSDimitry Andric     team = this_thr->th.th_team;
506349cc55cSDimitry Andric     b = team->t.b;
507349cc55cSDimitry Andric     my_current_iter = b->iter[tid].iter;
508349cc55cSDimitry Andric     next_go = my_current_iter + distributedBarrier::MAX_ITERS;
509349cc55cSDimitry Andric #if KMP_BARRIER_ICV_PUSH
510349cc55cSDimitry Andric     if (propagate_icvs) {
511349cc55cSDimitry Andric       // primary thread has ICVs in final destination; copy
512349cc55cSDimitry Andric       copy_icvs(&thr_bar->th_fixed_icvs,
513349cc55cSDimitry Andric                 &team->t.t_implicit_task_taskdata[tid].td_icvs);
514349cc55cSDimitry Andric     }
515349cc55cSDimitry Andric #endif
516349cc55cSDimitry Andric     // Tell all the group leaders they can go!
517349cc55cSDimitry Andric     for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
518349cc55cSDimitry Andric       b->go[go_idx].go.store(next_go);
519349cc55cSDimitry Andric     }
520349cc55cSDimitry Andric 
521349cc55cSDimitry Andric     if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
522349cc55cSDimitry Andric       // Wake-up the group leaders
523349cc55cSDimitry Andric       size_t nproc = this_thr->th.th_team_nproc;
524349cc55cSDimitry Andric       __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
525349cc55cSDimitry Andric                                 b->threads_per_group, tid);
526349cc55cSDimitry Andric     }
527349cc55cSDimitry Andric 
528349cc55cSDimitry Andric     // Tell all the threads in my group they can go!
529349cc55cSDimitry Andric     for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
530349cc55cSDimitry Andric       b->go[go_idx].go.store(next_go);
531349cc55cSDimitry Andric     }
532349cc55cSDimitry Andric 
533349cc55cSDimitry Andric     // Fence added so that workers can see changes to go. sfence inadequate.
534349cc55cSDimitry Andric     KMP_MFENCE();
535349cc55cSDimitry Andric 
536349cc55cSDimitry Andric     if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
537349cc55cSDimitry Andric       // Wake-up the other threads in my group
538349cc55cSDimitry Andric       size_t nproc = this_thr->th.th_team_nproc;
539349cc55cSDimitry Andric       size_t group_end = tid + b->threads_per_group;
540349cc55cSDimitry Andric       if (nproc < group_end)
541349cc55cSDimitry Andric         group_end = nproc;
542349cc55cSDimitry Andric       __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
543349cc55cSDimitry Andric     }
544349cc55cSDimitry Andric   }
545349cc55cSDimitry Andric   // Update to next iteration
546349cc55cSDimitry Andric   KMP_ASSERT(my_current_iter == b->iter[tid].iter);
547349cc55cSDimitry Andric   b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;
548349cc55cSDimitry Andric 
549349cc55cSDimitry Andric   KA_TRACE(
550349cc55cSDimitry Andric       20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
551349cc55cSDimitry Andric            gtid, team->t.t_id, tid, bt));
552349cc55cSDimitry Andric }
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric // Linear Barrier
5550b57cec5SDimitry Andric template <bool cancellable = false>
5560b57cec5SDimitry Andric static bool __kmp_linear_barrier_gather_template(
5570b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
5580b57cec5SDimitry Andric     void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
5590b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
5600b57cec5SDimitry Andric   kmp_team_t *team = this_thr->th.th_team;
5610b57cec5SDimitry Andric   kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
5620b57cec5SDimitry Andric   kmp_info_t **other_threads = team->t.t_threads;
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric   KA_TRACE(
5650b57cec5SDimitry Andric       20,
5660b57cec5SDimitry Andric       ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
5670b57cec5SDimitry Andric        gtid, team->t.t_id, tid, bt));
5680b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
5690b57cec5SDimitry Andric 
5700b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
5710b57cec5SDimitry Andric   // Barrier imbalance - save arrive time to the thread
5720b57cec5SDimitry Andric   if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
5730b57cec5SDimitry Andric     this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
5740b57cec5SDimitry Andric         __itt_get_timestamp();
5750b57cec5SDimitry Andric   }
5760b57cec5SDimitry Andric #endif
5770b57cec5SDimitry Andric   // We now perform a linear reduction to signal that all of the threads have
5780b57cec5SDimitry Andric   // arrived.
5790b57cec5SDimitry Andric   if (!KMP_MASTER_TID(tid)) {
5800b57cec5SDimitry Andric     KA_TRACE(20,
5810b57cec5SDimitry Andric              ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
5820b57cec5SDimitry Andric               "arrived(%p): %llu => %llu\n",
5830b57cec5SDimitry Andric               gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
5840b57cec5SDimitry Andric               team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
5850b57cec5SDimitry Andric               thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
586fe6060f1SDimitry Andric     // Mark arrival to primary thread
5870b57cec5SDimitry Andric     /* After performing this write, a worker thread may not assume that the team
588fe6060f1SDimitry Andric        is valid any more - it could be deallocated by the primary thread at any
5890b57cec5SDimitry Andric        time. */
590e8d8bef9SDimitry Andric     kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]);
5910b57cec5SDimitry Andric     flag.release();
5920b57cec5SDimitry Andric   } else {
5930b57cec5SDimitry Andric     kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
5940b57cec5SDimitry Andric     int nproc = this_thr->th.th_team_nproc;
5950b57cec5SDimitry Andric     int i;
5960b57cec5SDimitry Andric     // Don't have to worry about sleep bit here or atomic since team setting
5970b57cec5SDimitry Andric     kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
5980b57cec5SDimitry Andric 
5990b57cec5SDimitry Andric     // Collect all the worker team member threads.
6000b57cec5SDimitry Andric     for (i = 1; i < nproc; ++i) {
6010b57cec5SDimitry Andric #if KMP_CACHE_MANAGE
6020b57cec5SDimitry Andric       // Prefetch next thread's arrived count
6030b57cec5SDimitry Andric       if (i + 1 < nproc)
6040b57cec5SDimitry Andric         KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
6050b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */
6060b57cec5SDimitry Andric       KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
6070b57cec5SDimitry Andric                     "arrived(%p) == %llu\n",
6080b57cec5SDimitry Andric                     gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
6090b57cec5SDimitry Andric                     team->t.t_id, i,
6100b57cec5SDimitry Andric                     &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric       // Wait for worker thread to arrive
6130b57cec5SDimitry Andric       if (cancellable) {
614e8d8bef9SDimitry Andric         kmp_flag_64<true, false> flag(
615e8d8bef9SDimitry Andric             &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
616e8d8bef9SDimitry Andric         if (flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)))
6170b57cec5SDimitry Andric           return true;
6180b57cec5SDimitry Andric       } else {
619e8d8bef9SDimitry Andric         kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
620e8d8bef9SDimitry Andric                            new_state);
6210b57cec5SDimitry Andric         flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
6220b57cec5SDimitry Andric       }
6230b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
6240b57cec5SDimitry Andric       // Barrier imbalance - write min of the thread time and the other thread
6250b57cec5SDimitry Andric       // time to the thread.
6260b57cec5SDimitry Andric       if (__kmp_forkjoin_frames_mode == 2) {
6270b57cec5SDimitry Andric         this_thr->th.th_bar_min_time = KMP_MIN(
6280b57cec5SDimitry Andric             this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
6290b57cec5SDimitry Andric       }
6300b57cec5SDimitry Andric #endif
6310b57cec5SDimitry Andric       if (reduce) {
6320b57cec5SDimitry Andric         KA_TRACE(100,
6330b57cec5SDimitry Andric                  ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
6340b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
6350b57cec5SDimitry Andric                   team->t.t_id, i));
636480093f4SDimitry Andric         OMPT_REDUCTION_DECL(this_thr, gtid);
637480093f4SDimitry Andric         OMPT_REDUCTION_BEGIN;
6380b57cec5SDimitry Andric         (*reduce)(this_thr->th.th_local.reduce_data,
6390b57cec5SDimitry Andric                   other_threads[i]->th.th_local.reduce_data);
640480093f4SDimitry Andric         OMPT_REDUCTION_END;
6410b57cec5SDimitry Andric       }
6420b57cec5SDimitry Andric     }
6430b57cec5SDimitry Andric     // Don't have to worry about sleep bit here or atomic since team setting
6440b57cec5SDimitry Andric     team_bar->b_arrived = new_state;
6450b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
6460b57cec5SDimitry Andric                   "arrived(%p) = %llu\n",
6470b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
6480b57cec5SDimitry Andric                   new_state));
6490b57cec5SDimitry Andric   }
6500b57cec5SDimitry Andric   KA_TRACE(
6510b57cec5SDimitry Andric       20,
6520b57cec5SDimitry Andric       ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
6530b57cec5SDimitry Andric        gtid, team->t.t_id, tid, bt));
6540b57cec5SDimitry Andric   return false;
6550b57cec5SDimitry Andric }
6560b57cec5SDimitry Andric 
6570b57cec5SDimitry Andric template <bool cancellable = false>
6580b57cec5SDimitry Andric static bool __kmp_linear_barrier_release_template(
6590b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
6600b57cec5SDimitry Andric     int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
6610b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
6620b57cec5SDimitry Andric   kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
6630b57cec5SDimitry Andric   kmp_team_t *team;
6640b57cec5SDimitry Andric 
6650b57cec5SDimitry Andric   if (KMP_MASTER_TID(tid)) {
6660b57cec5SDimitry Andric     unsigned int i;
6670b57cec5SDimitry Andric     kmp_uint32 nproc = this_thr->th.th_team_nproc;
6680b57cec5SDimitry Andric     kmp_info_t **other_threads;
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric     team = __kmp_threads[gtid]->th.th_team;
6710b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(team != NULL);
6720b57cec5SDimitry Andric     other_threads = team->t.t_threads;
6730b57cec5SDimitry Andric 
674fe6060f1SDimitry Andric     KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
6750b57cec5SDimitry Andric                   "barrier type %d\n",
6760b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, bt));
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric     if (nproc > 1) {
6790b57cec5SDimitry Andric #if KMP_BARRIER_ICV_PUSH
6800b57cec5SDimitry Andric       {
6810b57cec5SDimitry Andric         KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
6820b57cec5SDimitry Andric         if (propagate_icvs) {
6830b57cec5SDimitry Andric           ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
6840b57cec5SDimitry Andric           for (i = 1; i < nproc; ++i) {
6850b57cec5SDimitry Andric             __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
6860b57cec5SDimitry Andric                                      team, i, FALSE);
6870b57cec5SDimitry Andric             ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
6880b57cec5SDimitry Andric                            &team->t.t_implicit_task_taskdata[0].td_icvs);
6890b57cec5SDimitry Andric           }
6900b57cec5SDimitry Andric           ngo_sync();
6910b57cec5SDimitry Andric         }
6920b57cec5SDimitry Andric       }
6930b57cec5SDimitry Andric #endif // KMP_BARRIER_ICV_PUSH
6940b57cec5SDimitry Andric 
6950b57cec5SDimitry Andric       // Now, release all of the worker threads
6960b57cec5SDimitry Andric       for (i = 1; i < nproc; ++i) {
6970b57cec5SDimitry Andric #if KMP_CACHE_MANAGE
6980b57cec5SDimitry Andric         // Prefetch next thread's go flag
6990b57cec5SDimitry Andric         if (i + 1 < nproc)
7000b57cec5SDimitry Andric           KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
7010b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */
7020b57cec5SDimitry Andric         KA_TRACE(
7030b57cec5SDimitry Andric             20,
7040b57cec5SDimitry Andric             ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
7050b57cec5SDimitry Andric              "go(%p): %u => %u\n",
7060b57cec5SDimitry Andric              gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
7070b57cec5SDimitry Andric              team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
7080b57cec5SDimitry Andric              other_threads[i]->th.th_bar[bt].bb.b_go,
7090b57cec5SDimitry Andric              other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
710e8d8bef9SDimitry Andric         kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
7110b57cec5SDimitry Andric                            other_threads[i]);
7120b57cec5SDimitry Andric         flag.release();
7130b57cec5SDimitry Andric       }
7140b57cec5SDimitry Andric     }
715fe6060f1SDimitry Andric   } else { // Wait for the PRIMARY thread to release us
7160b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
7170b57cec5SDimitry Andric                   gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
7180b57cec5SDimitry Andric     if (cancellable) {
719e8d8bef9SDimitry Andric       kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
720e8d8bef9SDimitry Andric       if (flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)))
7210b57cec5SDimitry Andric         return true;
7220b57cec5SDimitry Andric     } else {
723e8d8bef9SDimitry Andric       kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
7240b57cec5SDimitry Andric       flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
7250b57cec5SDimitry Andric     }
7260b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
7270b57cec5SDimitry Andric     if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
7280b57cec5SDimitry Andric       // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is
7290b57cec5SDimitry Andric       // disabled)
7300b57cec5SDimitry Andric       itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
7310b57cec5SDimitry Andric       // Cancel wait on previous parallel region...
7320b57cec5SDimitry Andric       __kmp_itt_task_starting(itt_sync_obj);
7330b57cec5SDimitry Andric 
7340b57cec5SDimitry Andric       if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
7350b57cec5SDimitry Andric         return false;
7360b57cec5SDimitry Andric 
7370b57cec5SDimitry Andric       itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
7380b57cec5SDimitry Andric       if (itt_sync_obj != NULL)
7390b57cec5SDimitry Andric         // Call prepare as early as possible for "new" barrier
7400b57cec5SDimitry Andric         __kmp_itt_task_finished(itt_sync_obj);
7410b57cec5SDimitry Andric     } else
7420b57cec5SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
7430b57cec5SDimitry Andric         // Early exit for reaping threads releasing forkjoin barrier
7440b57cec5SDimitry Andric         if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
7450b57cec5SDimitry Andric       return false;
7460b57cec5SDimitry Andric // The worker thread may now assume that the team is valid.
7470b57cec5SDimitry Andric #ifdef KMP_DEBUG
7480b57cec5SDimitry Andric     tid = __kmp_tid_from_gtid(gtid);
7490b57cec5SDimitry Andric     team = __kmp_threads[gtid]->th.th_team;
7500b57cec5SDimitry Andric #endif
7510b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(team != NULL);
7520b57cec5SDimitry Andric     TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
7530b57cec5SDimitry Andric     KA_TRACE(20,
7540b57cec5SDimitry Andric              ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
7550b57cec5SDimitry Andric               gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
7560b57cec5SDimitry Andric     KMP_MB(); // Flush all pending memory write invalidates.
7570b57cec5SDimitry Andric   }
7580b57cec5SDimitry Andric   KA_TRACE(
7590b57cec5SDimitry Andric       20,
7600b57cec5SDimitry Andric       ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
7610b57cec5SDimitry Andric        gtid, team->t.t_id, tid, bt));
7620b57cec5SDimitry Andric   return false;
7630b57cec5SDimitry Andric }
7640b57cec5SDimitry Andric 
7650b57cec5SDimitry Andric static void __kmp_linear_barrier_gather(
7660b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
7670b57cec5SDimitry Andric     void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
7680b57cec5SDimitry Andric   __kmp_linear_barrier_gather_template<false>(
7690b57cec5SDimitry Andric       bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
7700b57cec5SDimitry Andric }
7710b57cec5SDimitry Andric 
7720b57cec5SDimitry Andric static bool __kmp_linear_barrier_gather_cancellable(
7730b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
7740b57cec5SDimitry Andric     void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
7750b57cec5SDimitry Andric   return __kmp_linear_barrier_gather_template<true>(
7760b57cec5SDimitry Andric       bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
7770b57cec5SDimitry Andric }
7780b57cec5SDimitry Andric 
7790b57cec5SDimitry Andric static void __kmp_linear_barrier_release(
7800b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
7810b57cec5SDimitry Andric     int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
7820b57cec5SDimitry Andric   __kmp_linear_barrier_release_template<false>(
7830b57cec5SDimitry Andric       bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj));
7840b57cec5SDimitry Andric }
7850b57cec5SDimitry Andric 
7860b57cec5SDimitry Andric static bool __kmp_linear_barrier_release_cancellable(
7870b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
7880b57cec5SDimitry Andric     int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
7890b57cec5SDimitry Andric   return __kmp_linear_barrier_release_template<true>(
7900b57cec5SDimitry Andric       bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj));
7910b57cec5SDimitry Andric }
7920b57cec5SDimitry Andric 
7930b57cec5SDimitry Andric // Tree barrier
794fe6060f1SDimitry Andric static void __kmp_tree_barrier_gather(
795fe6060f1SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
796fe6060f1SDimitry Andric     void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
7970b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
7980b57cec5SDimitry Andric   kmp_team_t *team = this_thr->th.th_team;
7990b57cec5SDimitry Andric   kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
8000b57cec5SDimitry Andric   kmp_info_t **other_threads = team->t.t_threads;
8010b57cec5SDimitry Andric   kmp_uint32 nproc = this_thr->th.th_team_nproc;
8020b57cec5SDimitry Andric   kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
8030b57cec5SDimitry Andric   kmp_uint32 branch_factor = 1 << branch_bits;
8040b57cec5SDimitry Andric   kmp_uint32 child;
8050b57cec5SDimitry Andric   kmp_uint32 child_tid;
806fe6060f1SDimitry Andric   kmp_uint64 new_state = 0;
8070b57cec5SDimitry Andric 
8080b57cec5SDimitry Andric   KA_TRACE(
8090b57cec5SDimitry Andric       20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
8100b57cec5SDimitry Andric            gtid, team->t.t_id, tid, bt));
8110b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
8120b57cec5SDimitry Andric 
8130b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
8140b57cec5SDimitry Andric   // Barrier imbalance - save arrive time to the thread
8150b57cec5SDimitry Andric   if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
8160b57cec5SDimitry Andric     this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
8170b57cec5SDimitry Andric         __itt_get_timestamp();
8180b57cec5SDimitry Andric   }
8190b57cec5SDimitry Andric #endif
8200b57cec5SDimitry Andric   // Perform tree gather to wait until all threads have arrived; reduce any
8210b57cec5SDimitry Andric   // required data as we go
8220b57cec5SDimitry Andric   child_tid = (tid << branch_bits) + 1;
8230b57cec5SDimitry Andric   if (child_tid < nproc) {
8240b57cec5SDimitry Andric     // Parent threads wait for all their children to arrive
8250b57cec5SDimitry Andric     new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
8260b57cec5SDimitry Andric     child = 1;
8270b57cec5SDimitry Andric     do {
8280b57cec5SDimitry Andric       kmp_info_t *child_thr = other_threads[child_tid];
8290b57cec5SDimitry Andric       kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
8300b57cec5SDimitry Andric #if KMP_CACHE_MANAGE
8310b57cec5SDimitry Andric       // Prefetch next thread's arrived count
8320b57cec5SDimitry Andric       if (child + 1 <= branch_factor && child_tid + 1 < nproc)
8330b57cec5SDimitry Andric         KMP_CACHE_PREFETCH(
8340b57cec5SDimitry Andric             &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
8350b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */
8360b57cec5SDimitry Andric       KA_TRACE(20,
8370b57cec5SDimitry Andric                ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
8380b57cec5SDimitry Andric                 "arrived(%p) == %llu\n",
8390b57cec5SDimitry Andric                 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
8400b57cec5SDimitry Andric                 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
8410b57cec5SDimitry Andric       // Wait for child to arrive
842e8d8bef9SDimitry Andric       kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
8430b57cec5SDimitry Andric       flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
8440b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
8450b57cec5SDimitry Andric       // Barrier imbalance - write min of the thread time and a child time to
8460b57cec5SDimitry Andric       // the thread.
8470b57cec5SDimitry Andric       if (__kmp_forkjoin_frames_mode == 2) {
8480b57cec5SDimitry Andric         this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
8490b57cec5SDimitry Andric                                                child_thr->th.th_bar_min_time);
8500b57cec5SDimitry Andric       }
8510b57cec5SDimitry Andric #endif
8520b57cec5SDimitry Andric       if (reduce) {
8530b57cec5SDimitry Andric         KA_TRACE(100,
8540b57cec5SDimitry Andric                  ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
8550b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
8560b57cec5SDimitry Andric                   team->t.t_id, child_tid));
857480093f4SDimitry Andric         OMPT_REDUCTION_DECL(this_thr, gtid);
858480093f4SDimitry Andric         OMPT_REDUCTION_BEGIN;
8590b57cec5SDimitry Andric         (*reduce)(this_thr->th.th_local.reduce_data,
8600b57cec5SDimitry Andric                   child_thr->th.th_local.reduce_data);
861480093f4SDimitry Andric         OMPT_REDUCTION_END;
8620b57cec5SDimitry Andric       }
8630b57cec5SDimitry Andric       child++;
8640b57cec5SDimitry Andric       child_tid++;
8650b57cec5SDimitry Andric     } while (child <= branch_factor && child_tid < nproc);
8660b57cec5SDimitry Andric   }
8670b57cec5SDimitry Andric 
8680b57cec5SDimitry Andric   if (!KMP_MASTER_TID(tid)) { // Worker threads
8690b57cec5SDimitry Andric     kmp_int32 parent_tid = (tid - 1) >> branch_bits;
8700b57cec5SDimitry Andric 
8710b57cec5SDimitry Andric     KA_TRACE(20,
8720b57cec5SDimitry Andric              ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
8730b57cec5SDimitry Andric               "arrived(%p): %llu => %llu\n",
8740b57cec5SDimitry Andric               gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
8750b57cec5SDimitry Andric               team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
8760b57cec5SDimitry Andric               thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
8770b57cec5SDimitry Andric 
8780b57cec5SDimitry Andric     // Mark arrival to parent thread
8790b57cec5SDimitry Andric     /* After performing this write, a worker thread may not assume that the team
880fe6060f1SDimitry Andric        is valid any more - it could be deallocated by the primary thread at any
8810b57cec5SDimitry Andric        time.  */
882e8d8bef9SDimitry Andric     kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]);
8830b57cec5SDimitry Andric     flag.release();
8840b57cec5SDimitry Andric   } else {
885fe6060f1SDimitry Andric     // Need to update the team arrived pointer if we are the primary thread
8860b57cec5SDimitry Andric     if (nproc > 1) // New value was already computed above
8870b57cec5SDimitry Andric       team->t.t_bar[bt].b_arrived = new_state;
8880b57cec5SDimitry Andric     else
8890b57cec5SDimitry Andric       team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
8900b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
8910b57cec5SDimitry Andric                   "arrived(%p) = %llu\n",
8920b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, team->t.t_id,
8930b57cec5SDimitry Andric                   &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
8940b57cec5SDimitry Andric   }
8950b57cec5SDimitry Andric   KA_TRACE(20,
8960b57cec5SDimitry Andric            ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
8970b57cec5SDimitry Andric             gtid, team->t.t_id, tid, bt));
8980b57cec5SDimitry Andric }
8990b57cec5SDimitry Andric 
9000b57cec5SDimitry Andric static void __kmp_tree_barrier_release(
9010b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
9020b57cec5SDimitry Andric     int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
9030b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
9040b57cec5SDimitry Andric   kmp_team_t *team;
9050b57cec5SDimitry Andric   kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
9060b57cec5SDimitry Andric   kmp_uint32 nproc;
9070b57cec5SDimitry Andric   kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
9080b57cec5SDimitry Andric   kmp_uint32 branch_factor = 1 << branch_bits;
9090b57cec5SDimitry Andric   kmp_uint32 child;
9100b57cec5SDimitry Andric   kmp_uint32 child_tid;
9110b57cec5SDimitry Andric 
9120b57cec5SDimitry Andric   // Perform a tree release for all of the threads that have been gathered
9130b57cec5SDimitry Andric   if (!KMP_MASTER_TID(
9140b57cec5SDimitry Andric           tid)) { // Handle fork barrier workers who aren't part of a team yet
9150b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
9160b57cec5SDimitry Andric                   &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
9170b57cec5SDimitry Andric     // Wait for parent thread to release us
918e8d8bef9SDimitry Andric     kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
9190b57cec5SDimitry Andric     flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
9200b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
9210b57cec5SDimitry Andric     if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
9220b57cec5SDimitry Andric       // In fork barrier where we could not get the object reliably (or
9230b57cec5SDimitry Andric       // ITTNOTIFY is disabled)
9240b57cec5SDimitry Andric       itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
9250b57cec5SDimitry Andric       // Cancel wait on previous parallel region...
9260b57cec5SDimitry Andric       __kmp_itt_task_starting(itt_sync_obj);
9270b57cec5SDimitry Andric 
9280b57cec5SDimitry Andric       if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
9290b57cec5SDimitry Andric         return;
9300b57cec5SDimitry Andric 
9310b57cec5SDimitry Andric       itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
9320b57cec5SDimitry Andric       if (itt_sync_obj != NULL)
9330b57cec5SDimitry Andric         // Call prepare as early as possible for "new" barrier
9340b57cec5SDimitry Andric         __kmp_itt_task_finished(itt_sync_obj);
9350b57cec5SDimitry Andric     } else
9360b57cec5SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
9370b57cec5SDimitry Andric         // Early exit for reaping threads releasing forkjoin barrier
9380b57cec5SDimitry Andric         if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
9390b57cec5SDimitry Andric       return;
9400b57cec5SDimitry Andric 
9410b57cec5SDimitry Andric     // The worker thread may now assume that the team is valid.
9420b57cec5SDimitry Andric     team = __kmp_threads[gtid]->th.th_team;
9430b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(team != NULL);
9440b57cec5SDimitry Andric     tid = __kmp_tid_from_gtid(gtid);
9450b57cec5SDimitry Andric 
9460b57cec5SDimitry Andric     TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
9470b57cec5SDimitry Andric     KA_TRACE(20,
9480b57cec5SDimitry Andric              ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
9490b57cec5SDimitry Andric               team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
9500b57cec5SDimitry Andric     KMP_MB(); // Flush all pending memory write invalidates.
9510b57cec5SDimitry Andric   } else {
9520b57cec5SDimitry Andric     team = __kmp_threads[gtid]->th.th_team;
9530b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(team != NULL);
954fe6060f1SDimitry Andric     KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
9550b57cec5SDimitry Andric                   "barrier type %d\n",
9560b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, bt));
9570b57cec5SDimitry Andric   }
9580b57cec5SDimitry Andric   nproc = this_thr->th.th_team_nproc;
9590b57cec5SDimitry Andric   child_tid = (tid << branch_bits) + 1;
9600b57cec5SDimitry Andric 
9610b57cec5SDimitry Andric   if (child_tid < nproc) {
9620b57cec5SDimitry Andric     kmp_info_t **other_threads = team->t.t_threads;
9630b57cec5SDimitry Andric     child = 1;
9640b57cec5SDimitry Andric     // Parent threads release all their children
9650b57cec5SDimitry Andric     do {
9660b57cec5SDimitry Andric       kmp_info_t *child_thr = other_threads[child_tid];
9670b57cec5SDimitry Andric       kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
9680b57cec5SDimitry Andric #if KMP_CACHE_MANAGE
9690b57cec5SDimitry Andric       // Prefetch next thread's go count
9700b57cec5SDimitry Andric       if (child + 1 <= branch_factor && child_tid + 1 < nproc)
9710b57cec5SDimitry Andric         KMP_CACHE_PREFETCH(
9720b57cec5SDimitry Andric             &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
9730b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */
9740b57cec5SDimitry Andric 
9750b57cec5SDimitry Andric #if KMP_BARRIER_ICV_PUSH
9760b57cec5SDimitry Andric       {
9770b57cec5SDimitry Andric         KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
9780b57cec5SDimitry Andric         if (propagate_icvs) {
9790b57cec5SDimitry Andric           __kmp_init_implicit_task(team->t.t_ident,
9800b57cec5SDimitry Andric                                    team->t.t_threads[child_tid], team,
9810b57cec5SDimitry Andric                                    child_tid, FALSE);
9820b57cec5SDimitry Andric           copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
9830b57cec5SDimitry Andric                     &team->t.t_implicit_task_taskdata[0].td_icvs);
9840b57cec5SDimitry Andric         }
9850b57cec5SDimitry Andric       }
9860b57cec5SDimitry Andric #endif // KMP_BARRIER_ICV_PUSH
9870b57cec5SDimitry Andric       KA_TRACE(20,
9880b57cec5SDimitry Andric                ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
9890b57cec5SDimitry Andric                 "go(%p): %u => %u\n",
9900b57cec5SDimitry Andric                 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
9910b57cec5SDimitry Andric                 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
9920b57cec5SDimitry Andric                 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
9930b57cec5SDimitry Andric       // Release child from barrier
994e8d8bef9SDimitry Andric       kmp_flag_64<> flag(&child_bar->b_go, child_thr);
9950b57cec5SDimitry Andric       flag.release();
9960b57cec5SDimitry Andric       child++;
9970b57cec5SDimitry Andric       child_tid++;
9980b57cec5SDimitry Andric     } while (child <= branch_factor && child_tid < nproc);
9990b57cec5SDimitry Andric   }
10000b57cec5SDimitry Andric   KA_TRACE(
10010b57cec5SDimitry Andric       20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
10020b57cec5SDimitry Andric            gtid, team->t.t_id, tid, bt));
10030b57cec5SDimitry Andric }
10040b57cec5SDimitry Andric 
10050b57cec5SDimitry Andric // Hyper Barrier
1006fe6060f1SDimitry Andric static void __kmp_hyper_barrier_gather(
1007fe6060f1SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1008fe6060f1SDimitry Andric     void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
10090b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
10100b57cec5SDimitry Andric   kmp_team_t *team = this_thr->th.th_team;
10110b57cec5SDimitry Andric   kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
10120b57cec5SDimitry Andric   kmp_info_t **other_threads = team->t.t_threads;
10130b57cec5SDimitry Andric   kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
10140b57cec5SDimitry Andric   kmp_uint32 num_threads = this_thr->th.th_team_nproc;
10150b57cec5SDimitry Andric   kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
10160b57cec5SDimitry Andric   kmp_uint32 branch_factor = 1 << branch_bits;
10170b57cec5SDimitry Andric   kmp_uint32 offset;
10180b57cec5SDimitry Andric   kmp_uint32 level;
10190b57cec5SDimitry Andric 
10200b57cec5SDimitry Andric   KA_TRACE(
10210b57cec5SDimitry Andric       20,
10220b57cec5SDimitry Andric       ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
10230b57cec5SDimitry Andric        gtid, team->t.t_id, tid, bt));
10240b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
10250b57cec5SDimitry Andric 
10260b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
10270b57cec5SDimitry Andric   // Barrier imbalance - save arrive time to the thread
10280b57cec5SDimitry Andric   if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
10290b57cec5SDimitry Andric     this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
10300b57cec5SDimitry Andric         __itt_get_timestamp();
10310b57cec5SDimitry Andric   }
10320b57cec5SDimitry Andric #endif
10330b57cec5SDimitry Andric   /* Perform a hypercube-embedded tree gather to wait until all of the threads
10340b57cec5SDimitry Andric      have arrived, and reduce any required data as we go.  */
1035e8d8bef9SDimitry Andric   kmp_flag_64<> p_flag(&thr_bar->b_arrived);
10360b57cec5SDimitry Andric   for (level = 0, offset = 1; offset < num_threads;
10370b57cec5SDimitry Andric        level += branch_bits, offset <<= branch_bits) {
10380b57cec5SDimitry Andric     kmp_uint32 child;
10390b57cec5SDimitry Andric     kmp_uint32 child_tid;
10400b57cec5SDimitry Andric 
10410b57cec5SDimitry Andric     if (((tid >> level) & (branch_factor - 1)) != 0) {
10420b57cec5SDimitry Andric       kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
10430b57cec5SDimitry Andric 
10445ffd83dbSDimitry Andric       KMP_MB(); // Synchronize parent and child threads.
10450b57cec5SDimitry Andric       KA_TRACE(20,
10460b57cec5SDimitry Andric                ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
10470b57cec5SDimitry Andric                 "arrived(%p): %llu => %llu\n",
10480b57cec5SDimitry Andric                 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
10490b57cec5SDimitry Andric                 team->t.t_id, parent_tid, &thr_bar->b_arrived,
10500b57cec5SDimitry Andric                 thr_bar->b_arrived,
10510b57cec5SDimitry Andric                 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
10520b57cec5SDimitry Andric       // Mark arrival to parent thread
10530b57cec5SDimitry Andric       /* After performing this write (in the last iteration of the enclosing for
10540b57cec5SDimitry Andric          loop), a worker thread may not assume that the team is valid any more
1055fe6060f1SDimitry Andric          - it could be deallocated by the primary thread at any time.  */
10560b57cec5SDimitry Andric       p_flag.set_waiter(other_threads[parent_tid]);
10570b57cec5SDimitry Andric       p_flag.release();
10580b57cec5SDimitry Andric       break;
10590b57cec5SDimitry Andric     }
10600b57cec5SDimitry Andric 
10610b57cec5SDimitry Andric     // Parent threads wait for children to arrive
10620b57cec5SDimitry Andric     if (new_state == KMP_BARRIER_UNUSED_STATE)
10630b57cec5SDimitry Andric       new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
10640b57cec5SDimitry Andric     for (child = 1, child_tid = tid + (1 << level);
10650b57cec5SDimitry Andric          child < branch_factor && child_tid < num_threads;
10660b57cec5SDimitry Andric          child++, child_tid += (1 << level)) {
10670b57cec5SDimitry Andric       kmp_info_t *child_thr = other_threads[child_tid];
10680b57cec5SDimitry Andric       kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
10690b57cec5SDimitry Andric #if KMP_CACHE_MANAGE
10700b57cec5SDimitry Andric       kmp_uint32 next_child_tid = child_tid + (1 << level);
10710b57cec5SDimitry Andric       // Prefetch next thread's arrived count
10720b57cec5SDimitry Andric       if (child + 1 < branch_factor && next_child_tid < num_threads)
10730b57cec5SDimitry Andric         KMP_CACHE_PREFETCH(
10740b57cec5SDimitry Andric             &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
10750b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */
10760b57cec5SDimitry Andric       KA_TRACE(20,
10770b57cec5SDimitry Andric                ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
10780b57cec5SDimitry Andric                 "arrived(%p) == %llu\n",
10790b57cec5SDimitry Andric                 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
10800b57cec5SDimitry Andric                 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
10810b57cec5SDimitry Andric       // Wait for child to arrive
1082e8d8bef9SDimitry Andric       kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state);
10830b57cec5SDimitry Andric       c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
10845ffd83dbSDimitry Andric       KMP_MB(); // Synchronize parent and child threads.
10850b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
10860b57cec5SDimitry Andric       // Barrier imbalance - write min of the thread time and a child time to
10870b57cec5SDimitry Andric       // the thread.
10880b57cec5SDimitry Andric       if (__kmp_forkjoin_frames_mode == 2) {
10890b57cec5SDimitry Andric         this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
10900b57cec5SDimitry Andric                                                child_thr->th.th_bar_min_time);
10910b57cec5SDimitry Andric       }
10920b57cec5SDimitry Andric #endif
10930b57cec5SDimitry Andric       if (reduce) {
10940b57cec5SDimitry Andric         KA_TRACE(100,
10950b57cec5SDimitry Andric                  ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
10960b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
10970b57cec5SDimitry Andric                   team->t.t_id, child_tid));
1098480093f4SDimitry Andric         OMPT_REDUCTION_DECL(this_thr, gtid);
1099480093f4SDimitry Andric         OMPT_REDUCTION_BEGIN;
11000b57cec5SDimitry Andric         (*reduce)(this_thr->th.th_local.reduce_data,
11010b57cec5SDimitry Andric                   child_thr->th.th_local.reduce_data);
1102480093f4SDimitry Andric         OMPT_REDUCTION_END;
11030b57cec5SDimitry Andric       }
11040b57cec5SDimitry Andric     }
11050b57cec5SDimitry Andric   }
11060b57cec5SDimitry Andric 
11070b57cec5SDimitry Andric   if (KMP_MASTER_TID(tid)) {
1108fe6060f1SDimitry Andric     // Need to update the team arrived pointer if we are the primary thread
11090b57cec5SDimitry Andric     if (new_state == KMP_BARRIER_UNUSED_STATE)
11100b57cec5SDimitry Andric       team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
11110b57cec5SDimitry Andric     else
11120b57cec5SDimitry Andric       team->t.t_bar[bt].b_arrived = new_state;
11130b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
11140b57cec5SDimitry Andric                   "arrived(%p) = %llu\n",
11150b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, team->t.t_id,
11160b57cec5SDimitry Andric                   &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
11170b57cec5SDimitry Andric   }
11180b57cec5SDimitry Andric   KA_TRACE(
11190b57cec5SDimitry Andric       20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
11200b57cec5SDimitry Andric            gtid, team->t.t_id, tid, bt));
11210b57cec5SDimitry Andric }
11220b57cec5SDimitry Andric 
11230b57cec5SDimitry Andric // The reverse versions seem to beat the forward versions overall
11240b57cec5SDimitry Andric #define KMP_REVERSE_HYPER_BAR
11250b57cec5SDimitry Andric static void __kmp_hyper_barrier_release(
11260b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
11270b57cec5SDimitry Andric     int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
11280b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
11290b57cec5SDimitry Andric   kmp_team_t *team;
11300b57cec5SDimitry Andric   kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
11310b57cec5SDimitry Andric   kmp_info_t **other_threads;
11320b57cec5SDimitry Andric   kmp_uint32 num_threads;
11330b57cec5SDimitry Andric   kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
11340b57cec5SDimitry Andric   kmp_uint32 branch_factor = 1 << branch_bits;
11350b57cec5SDimitry Andric   kmp_uint32 child;
11360b57cec5SDimitry Andric   kmp_uint32 child_tid;
11370b57cec5SDimitry Andric   kmp_uint32 offset;
11380b57cec5SDimitry Andric   kmp_uint32 level;
11390b57cec5SDimitry Andric 
11400b57cec5SDimitry Andric   /* Perform a hypercube-embedded tree release for all of the threads that have
11410b57cec5SDimitry Andric      been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads
11420b57cec5SDimitry Andric      are released in the reverse order of the corresponding gather, otherwise
11430b57cec5SDimitry Andric      threads are released in the same order. */
1144fe6060f1SDimitry Andric   if (KMP_MASTER_TID(tid)) { // primary thread
11450b57cec5SDimitry Andric     team = __kmp_threads[gtid]->th.th_team;
11460b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(team != NULL);
1147fe6060f1SDimitry Andric     KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
11480b57cec5SDimitry Andric                   "barrier type %d\n",
11490b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, bt));
11500b57cec5SDimitry Andric #if KMP_BARRIER_ICV_PUSH
1151fe6060f1SDimitry Andric     if (propagate_icvs) { // primary already has ICVs in final destination; copy
11520b57cec5SDimitry Andric       copy_icvs(&thr_bar->th_fixed_icvs,
11530b57cec5SDimitry Andric                 &team->t.t_implicit_task_taskdata[tid].td_icvs);
11540b57cec5SDimitry Andric     }
11550b57cec5SDimitry Andric #endif
11560b57cec5SDimitry Andric   } else { // Handle fork barrier workers who aren't part of a team yet
11570b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
11580b57cec5SDimitry Andric                   &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
11590b57cec5SDimitry Andric     // Wait for parent thread to release us
1160e8d8bef9SDimitry Andric     kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
11610b57cec5SDimitry Andric     flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
11620b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
11630b57cec5SDimitry Andric     if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
11640b57cec5SDimitry Andric       // In fork barrier where we could not get the object reliably
11650b57cec5SDimitry Andric       itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
11660b57cec5SDimitry Andric       // Cancel wait on previous parallel region...
11670b57cec5SDimitry Andric       __kmp_itt_task_starting(itt_sync_obj);
11680b57cec5SDimitry Andric 
11690b57cec5SDimitry Andric       if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
11700b57cec5SDimitry Andric         return;
11710b57cec5SDimitry Andric 
11720b57cec5SDimitry Andric       itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
11730b57cec5SDimitry Andric       if (itt_sync_obj != NULL)
11740b57cec5SDimitry Andric         // Call prepare as early as possible for "new" barrier
11750b57cec5SDimitry Andric         __kmp_itt_task_finished(itt_sync_obj);
11760b57cec5SDimitry Andric     } else
11770b57cec5SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
11780b57cec5SDimitry Andric         // Early exit for reaping threads releasing forkjoin barrier
11790b57cec5SDimitry Andric         if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
11800b57cec5SDimitry Andric       return;
11810b57cec5SDimitry Andric 
11820b57cec5SDimitry Andric     // The worker thread may now assume that the team is valid.
11830b57cec5SDimitry Andric     team = __kmp_threads[gtid]->th.th_team;
11840b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(team != NULL);
11850b57cec5SDimitry Andric     tid = __kmp_tid_from_gtid(gtid);
11860b57cec5SDimitry Andric 
11870b57cec5SDimitry Andric     TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
11880b57cec5SDimitry Andric     KA_TRACE(20,
11890b57cec5SDimitry Andric              ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
11900b57cec5SDimitry Andric               gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
11910b57cec5SDimitry Andric     KMP_MB(); // Flush all pending memory write invalidates.
11920b57cec5SDimitry Andric   }
11930b57cec5SDimitry Andric   num_threads = this_thr->th.th_team_nproc;
11940b57cec5SDimitry Andric   other_threads = team->t.t_threads;
11950b57cec5SDimitry Andric 
11960b57cec5SDimitry Andric #ifdef KMP_REVERSE_HYPER_BAR
11970b57cec5SDimitry Andric   // Count up to correct level for parent
11980b57cec5SDimitry Andric   for (level = 0, offset = 1;
11990b57cec5SDimitry Andric        offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
12000b57cec5SDimitry Andric        level += branch_bits, offset <<= branch_bits)
12010b57cec5SDimitry Andric     ;
12020b57cec5SDimitry Andric 
12030b57cec5SDimitry Andric   // Now go down from there
12040b57cec5SDimitry Andric   for (level -= branch_bits, offset >>= branch_bits; offset != 0;
12050b57cec5SDimitry Andric        level -= branch_bits, offset >>= branch_bits)
12060b57cec5SDimitry Andric #else
12070b57cec5SDimitry Andric   // Go down the tree, level by level
12080b57cec5SDimitry Andric   for (level = 0, offset = 1; offset < num_threads;
12090b57cec5SDimitry Andric        level += branch_bits, offset <<= branch_bits)
12100b57cec5SDimitry Andric #endif // KMP_REVERSE_HYPER_BAR
12110b57cec5SDimitry Andric   {
12120b57cec5SDimitry Andric #ifdef KMP_REVERSE_HYPER_BAR
12130b57cec5SDimitry Andric     /* Now go in reverse order through the children, highest to lowest.
12140b57cec5SDimitry Andric        Initial setting of child is conservative here. */
12150b57cec5SDimitry Andric     child = num_threads >> ((level == 0) ? level : level - 1);
12160b57cec5SDimitry Andric     for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
12170b57cec5SDimitry Andric         child_tid = tid + (child << level);
12180b57cec5SDimitry Andric          child >= 1; child--, child_tid -= (1 << level))
12190b57cec5SDimitry Andric #else
12200b57cec5SDimitry Andric     if (((tid >> level) & (branch_factor - 1)) != 0)
12210b57cec5SDimitry Andric       // No need to go lower than this, since this is the level parent would be
12220b57cec5SDimitry Andric       // notified
12230b57cec5SDimitry Andric       break;
12240b57cec5SDimitry Andric     // Iterate through children on this level of the tree
12250b57cec5SDimitry Andric     for (child = 1, child_tid = tid + (1 << level);
12260b57cec5SDimitry Andric          child < branch_factor && child_tid < num_threads;
12270b57cec5SDimitry Andric          child++, child_tid += (1 << level))
12280b57cec5SDimitry Andric #endif // KMP_REVERSE_HYPER_BAR
12290b57cec5SDimitry Andric     {
12300b57cec5SDimitry Andric       if (child_tid >= num_threads)
12310b57cec5SDimitry Andric         continue; // Child doesn't exist so keep going
12320b57cec5SDimitry Andric       else {
12330b57cec5SDimitry Andric         kmp_info_t *child_thr = other_threads[child_tid];
12340b57cec5SDimitry Andric         kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
12350b57cec5SDimitry Andric #if KMP_CACHE_MANAGE
12360b57cec5SDimitry Andric         kmp_uint32 next_child_tid = child_tid - (1 << level);
12370b57cec5SDimitry Andric // Prefetch next thread's go count
12380b57cec5SDimitry Andric #ifdef KMP_REVERSE_HYPER_BAR
12390b57cec5SDimitry Andric         if (child - 1 >= 1 && next_child_tid < num_threads)
12400b57cec5SDimitry Andric #else
12410b57cec5SDimitry Andric         if (child + 1 < branch_factor && next_child_tid < num_threads)
12420b57cec5SDimitry Andric #endif // KMP_REVERSE_HYPER_BAR
12430b57cec5SDimitry Andric           KMP_CACHE_PREFETCH(
12440b57cec5SDimitry Andric               &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
12450b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */
12460b57cec5SDimitry Andric 
12470b57cec5SDimitry Andric #if KMP_BARRIER_ICV_PUSH
12480b57cec5SDimitry Andric         if (propagate_icvs) // push my fixed ICVs to my child
12490b57cec5SDimitry Andric           copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
12500b57cec5SDimitry Andric #endif // KMP_BARRIER_ICV_PUSH
12510b57cec5SDimitry Andric 
12520b57cec5SDimitry Andric         KA_TRACE(
12530b57cec5SDimitry Andric             20,
12540b57cec5SDimitry Andric             ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
12550b57cec5SDimitry Andric              "go(%p): %u => %u\n",
12560b57cec5SDimitry Andric              gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
12570b57cec5SDimitry Andric              team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
12580b57cec5SDimitry Andric              child_bar->b_go + KMP_BARRIER_STATE_BUMP));
12590b57cec5SDimitry Andric         // Release child from barrier
1260e8d8bef9SDimitry Andric         kmp_flag_64<> flag(&child_bar->b_go, child_thr);
12610b57cec5SDimitry Andric         flag.release();
12620b57cec5SDimitry Andric       }
12630b57cec5SDimitry Andric     }
12640b57cec5SDimitry Andric   }
12650b57cec5SDimitry Andric #if KMP_BARRIER_ICV_PUSH
12660b57cec5SDimitry Andric   if (propagate_icvs &&
12670b57cec5SDimitry Andric       !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest
12680b57cec5SDimitry Andric     __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
12690b57cec5SDimitry Andric                              FALSE);
12700b57cec5SDimitry Andric     copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
12710b57cec5SDimitry Andric               &thr_bar->th_fixed_icvs);
12720b57cec5SDimitry Andric   }
12730b57cec5SDimitry Andric #endif
12740b57cec5SDimitry Andric   KA_TRACE(
12750b57cec5SDimitry Andric       20,
12760b57cec5SDimitry Andric       ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
12770b57cec5SDimitry Andric        gtid, team->t.t_id, tid, bt));
12780b57cec5SDimitry Andric }
12790b57cec5SDimitry Andric 
12800b57cec5SDimitry Andric // Hierarchical Barrier
12810b57cec5SDimitry Andric 
12820b57cec5SDimitry Andric // Initialize thread barrier data
12830b57cec5SDimitry Andric /* Initializes/re-initializes the hierarchical barrier data stored on a thread.
12840b57cec5SDimitry Andric    Performs the minimum amount of initialization required based on how the team
12850b57cec5SDimitry Andric    has changed. Returns true if leaf children will require both on-core and
12860b57cec5SDimitry Andric    traditional wake-up mechanisms. For example, if the team size increases,
12870b57cec5SDimitry Andric    threads already in the team will respond to on-core wakeup on their parent
12880b57cec5SDimitry Andric    thread, but threads newly added to the team will only be listening on the
12890b57cec5SDimitry Andric    their local b_go. */
12900b57cec5SDimitry Andric static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
12910b57cec5SDimitry Andric                                                    kmp_bstate_t *thr_bar,
12920b57cec5SDimitry Andric                                                    kmp_uint32 nproc, int gtid,
12930b57cec5SDimitry Andric                                                    int tid, kmp_team_t *team) {
12940b57cec5SDimitry Andric   // Checks to determine if (re-)initialization is needed
12950b57cec5SDimitry Andric   bool uninitialized = thr_bar->team == NULL;
12960b57cec5SDimitry Andric   bool team_changed = team != thr_bar->team;
12970b57cec5SDimitry Andric   bool team_sz_changed = nproc != thr_bar->nproc;
12980b57cec5SDimitry Andric   bool tid_changed = tid != thr_bar->old_tid;
12990b57cec5SDimitry Andric   bool retval = false;
13000b57cec5SDimitry Andric 
13010b57cec5SDimitry Andric   if (uninitialized || team_sz_changed) {
13020b57cec5SDimitry Andric     __kmp_get_hierarchy(nproc, thr_bar);
13030b57cec5SDimitry Andric   }
13040b57cec5SDimitry Andric 
13050b57cec5SDimitry Andric   if (uninitialized || team_sz_changed || tid_changed) {
1306fe6060f1SDimitry Andric     thr_bar->my_level = thr_bar->depth - 1; // default for primary thread
1307fe6060f1SDimitry Andric     thr_bar->parent_tid = -1; // default for primary thread
1308fe6060f1SDimitry Andric     if (!KMP_MASTER_TID(tid)) {
1309fe6060f1SDimitry Andric       // if not primary thread, find parent thread in hierarchy
13100b57cec5SDimitry Andric       kmp_uint32 d = 0;
13110b57cec5SDimitry Andric       while (d < thr_bar->depth) { // find parent based on level of thread in
13120b57cec5SDimitry Andric         // hierarchy, and note level
13130b57cec5SDimitry Andric         kmp_uint32 rem;
1314fe6060f1SDimitry Andric         if (d == thr_bar->depth - 2) { // reached level right below the primary
13150b57cec5SDimitry Andric           thr_bar->parent_tid = 0;
13160b57cec5SDimitry Andric           thr_bar->my_level = d;
13170b57cec5SDimitry Andric           break;
1318e8d8bef9SDimitry Andric         } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != 0) {
1319e8d8bef9SDimitry Andric           // TODO: can we make the above op faster?
13200b57cec5SDimitry Andric           // thread is not a subtree root at next level, so this is max
13210b57cec5SDimitry Andric           thr_bar->parent_tid = tid - rem;
13220b57cec5SDimitry Andric           thr_bar->my_level = d;
13230b57cec5SDimitry Andric           break;
13240b57cec5SDimitry Andric         }
13250b57cec5SDimitry Andric         ++d;
13260b57cec5SDimitry Andric       }
13270b57cec5SDimitry Andric     }
1328e8d8bef9SDimitry Andric     __kmp_type_convert(7 - ((tid - thr_bar->parent_tid) /
1329e8d8bef9SDimitry Andric                             (thr_bar->skip_per_level[thr_bar->my_level])),
1330e8d8bef9SDimitry Andric                        &(thr_bar->offset));
13310b57cec5SDimitry Andric     thr_bar->old_tid = tid;
13320b57cec5SDimitry Andric     thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
13330b57cec5SDimitry Andric     thr_bar->team = team;
13340b57cec5SDimitry Andric     thr_bar->parent_bar =
13350b57cec5SDimitry Andric         &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
13360b57cec5SDimitry Andric   }
13370b57cec5SDimitry Andric   if (uninitialized || team_changed || tid_changed) {
13380b57cec5SDimitry Andric     thr_bar->team = team;
13390b57cec5SDimitry Andric     thr_bar->parent_bar =
13400b57cec5SDimitry Andric         &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
13410b57cec5SDimitry Andric     retval = true;
13420b57cec5SDimitry Andric   }
13430b57cec5SDimitry Andric   if (uninitialized || team_sz_changed || tid_changed) {
13440b57cec5SDimitry Andric     thr_bar->nproc = nproc;
13450b57cec5SDimitry Andric     thr_bar->leaf_kids = thr_bar->base_leaf_kids;
13460b57cec5SDimitry Andric     if (thr_bar->my_level == 0)
13470b57cec5SDimitry Andric       thr_bar->leaf_kids = 0;
13480b57cec5SDimitry Andric     if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
1349e8d8bef9SDimitry Andric       __kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids));
13500b57cec5SDimitry Andric     thr_bar->leaf_state = 0;
13510b57cec5SDimitry Andric     for (int i = 0; i < thr_bar->leaf_kids; ++i)
13520b57cec5SDimitry Andric       ((char *)&(thr_bar->leaf_state))[7 - i] = 1;
13530b57cec5SDimitry Andric   }
13540b57cec5SDimitry Andric   return retval;
13550b57cec5SDimitry Andric }
13560b57cec5SDimitry Andric 
13570b57cec5SDimitry Andric static void __kmp_hierarchical_barrier_gather(
13580b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
13590b57cec5SDimitry Andric     void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
13600b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
13610b57cec5SDimitry Andric   kmp_team_t *team = this_thr->th.th_team;
13620b57cec5SDimitry Andric   kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
13630b57cec5SDimitry Andric   kmp_uint32 nproc = this_thr->th.th_team_nproc;
13640b57cec5SDimitry Andric   kmp_info_t **other_threads = team->t.t_threads;
1365fe6060f1SDimitry Andric   kmp_uint64 new_state = 0;
13660b57cec5SDimitry Andric 
13670b57cec5SDimitry Andric   int level = team->t.t_level;
13680b57cec5SDimitry Andric   if (other_threads[0]
13690b57cec5SDimitry Andric           ->th.th_teams_microtask) // are we inside the teams construct?
13700b57cec5SDimitry Andric     if (this_thr->th.th_teams_size.nteams > 1)
13710b57cec5SDimitry Andric       ++level; // level was not increased in teams construct for team_of_masters
13720b57cec5SDimitry Andric   if (level == 1)
13730b57cec5SDimitry Andric     thr_bar->use_oncore_barrier = 1;
13740b57cec5SDimitry Andric   else
13750b57cec5SDimitry Andric     thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
13760b57cec5SDimitry Andric 
13770b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
13780b57cec5SDimitry Andric                 "barrier type %d\n",
13790b57cec5SDimitry Andric                 gtid, team->t.t_id, tid, bt));
13800b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
13810b57cec5SDimitry Andric 
13820b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
13830b57cec5SDimitry Andric   // Barrier imbalance - save arrive time to the thread
13840b57cec5SDimitry Andric   if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
13850b57cec5SDimitry Andric     this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
13860b57cec5SDimitry Andric   }
13870b57cec5SDimitry Andric #endif
13880b57cec5SDimitry Andric 
13890b57cec5SDimitry Andric   (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
13900b57cec5SDimitry Andric                                                team);
13910b57cec5SDimitry Andric 
13920b57cec5SDimitry Andric   if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
13930b57cec5SDimitry Andric     kmp_int32 child_tid;
13940b57cec5SDimitry Andric     new_state =
13950b57cec5SDimitry Andric         (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
13960b57cec5SDimitry Andric     if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
13970b57cec5SDimitry Andric         thr_bar->use_oncore_barrier) {
13980b57cec5SDimitry Andric       if (thr_bar->leaf_kids) {
13990b57cec5SDimitry Andric         // First, wait for leaf children to check-in on my b_arrived flag
14000b57cec5SDimitry Andric         kmp_uint64 leaf_state =
14010b57cec5SDimitry Andric             KMP_MASTER_TID(tid)
14020b57cec5SDimitry Andric                 ? thr_bar->b_arrived | thr_bar->leaf_state
14030b57cec5SDimitry Andric                 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
14040b57cec5SDimitry Andric         KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
14050b57cec5SDimitry Andric                       "for leaf kids\n",
14060b57cec5SDimitry Andric                       gtid, team->t.t_id, tid));
1407e8d8bef9SDimitry Andric         kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state);
14080b57cec5SDimitry Andric         flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
14090b57cec5SDimitry Andric         if (reduce) {
1410480093f4SDimitry Andric           OMPT_REDUCTION_DECL(this_thr, gtid);
1411480093f4SDimitry Andric           OMPT_REDUCTION_BEGIN;
14120b57cec5SDimitry Andric           for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
14130b57cec5SDimitry Andric                ++child_tid) {
14140b57cec5SDimitry Andric             KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
14150b57cec5SDimitry Andric                            "T#%d(%d:%d)\n",
14160b57cec5SDimitry Andric                            gtid, team->t.t_id, tid,
14170b57cec5SDimitry Andric                            __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
14180b57cec5SDimitry Andric                            child_tid));
14190b57cec5SDimitry Andric             (*reduce)(this_thr->th.th_local.reduce_data,
14200b57cec5SDimitry Andric                       other_threads[child_tid]->th.th_local.reduce_data);
14210b57cec5SDimitry Andric           }
1422480093f4SDimitry Andric           OMPT_REDUCTION_END;
14230b57cec5SDimitry Andric         }
14240b57cec5SDimitry Andric         // clear leaf_state bits
14250b57cec5SDimitry Andric         KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state));
14260b57cec5SDimitry Andric       }
14270b57cec5SDimitry Andric       // Next, wait for higher level children on each child's b_arrived flag
14280b57cec5SDimitry Andric       for (kmp_uint32 d = 1; d < thr_bar->my_level;
14290b57cec5SDimitry Andric            ++d) { // gather lowest level threads first, but skip 0
14300b57cec5SDimitry Andric         kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
14310b57cec5SDimitry Andric                    skip = thr_bar->skip_per_level[d];
14320b57cec5SDimitry Andric         if (last > nproc)
14330b57cec5SDimitry Andric           last = nproc;
14340b57cec5SDimitry Andric         for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
14350b57cec5SDimitry Andric           kmp_info_t *child_thr = other_threads[child_tid];
14360b57cec5SDimitry Andric           kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
14370b57cec5SDimitry Andric           KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
14380b57cec5SDimitry Andric                         "T#%d(%d:%d) "
14390b57cec5SDimitry Andric                         "arrived(%p) == %llu\n",
14400b57cec5SDimitry Andric                         gtid, team->t.t_id, tid,
14410b57cec5SDimitry Andric                         __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
14420b57cec5SDimitry Andric                         child_tid, &child_bar->b_arrived, new_state));
1443e8d8bef9SDimitry Andric           kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
14440b57cec5SDimitry Andric           flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
14450b57cec5SDimitry Andric           if (reduce) {
14460b57cec5SDimitry Andric             KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
14470b57cec5SDimitry Andric                            "T#%d(%d:%d)\n",
14480b57cec5SDimitry Andric                            gtid, team->t.t_id, tid,
14490b57cec5SDimitry Andric                            __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
14500b57cec5SDimitry Andric                            child_tid));
14510b57cec5SDimitry Andric             (*reduce)(this_thr->th.th_local.reduce_data,
14520b57cec5SDimitry Andric                       child_thr->th.th_local.reduce_data);
14530b57cec5SDimitry Andric           }
14540b57cec5SDimitry Andric         }
14550b57cec5SDimitry Andric       }
14560b57cec5SDimitry Andric     } else { // Blocktime is not infinite
14570b57cec5SDimitry Andric       for (kmp_uint32 d = 0; d < thr_bar->my_level;
14580b57cec5SDimitry Andric            ++d) { // Gather lowest level threads first
14590b57cec5SDimitry Andric         kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
14600b57cec5SDimitry Andric                    skip = thr_bar->skip_per_level[d];
14610b57cec5SDimitry Andric         if (last > nproc)
14620b57cec5SDimitry Andric           last = nproc;
14630b57cec5SDimitry Andric         for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
14640b57cec5SDimitry Andric           kmp_info_t *child_thr = other_threads[child_tid];
14650b57cec5SDimitry Andric           kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
14660b57cec5SDimitry Andric           KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
14670b57cec5SDimitry Andric                         "T#%d(%d:%d) "
14680b57cec5SDimitry Andric                         "arrived(%p) == %llu\n",
14690b57cec5SDimitry Andric                         gtid, team->t.t_id, tid,
14700b57cec5SDimitry Andric                         __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
14710b57cec5SDimitry Andric                         child_tid, &child_bar->b_arrived, new_state));
1472e8d8bef9SDimitry Andric           kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
14730b57cec5SDimitry Andric           flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
14740b57cec5SDimitry Andric           if (reduce) {
14750b57cec5SDimitry Andric             KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
14760b57cec5SDimitry Andric                            "T#%d(%d:%d)\n",
14770b57cec5SDimitry Andric                            gtid, team->t.t_id, tid,
14780b57cec5SDimitry Andric                            __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
14790b57cec5SDimitry Andric                            child_tid));
14800b57cec5SDimitry Andric             (*reduce)(this_thr->th.th_local.reduce_data,
14810b57cec5SDimitry Andric                       child_thr->th.th_local.reduce_data);
14820b57cec5SDimitry Andric           }
14830b57cec5SDimitry Andric         }
14840b57cec5SDimitry Andric       }
14850b57cec5SDimitry Andric     }
14860b57cec5SDimitry Andric   }
1487fe6060f1SDimitry Andric   // All subordinates are gathered; now release parent if not primary thread
14880b57cec5SDimitry Andric 
14890b57cec5SDimitry Andric   if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy
14900b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
14910b57cec5SDimitry Andric                   " T#%d(%d:%d) arrived(%p): %llu => %llu\n",
14920b57cec5SDimitry Andric                   gtid, team->t.t_id, tid,
14930b57cec5SDimitry Andric                   __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
14940b57cec5SDimitry Andric                   thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
14950b57cec5SDimitry Andric                   thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
14960b57cec5SDimitry Andric     /* Mark arrival to parent: After performing this write, a worker thread may
14970b57cec5SDimitry Andric        not assume that the team is valid any more - it could be deallocated by
1498fe6060f1SDimitry Andric        the primary thread at any time. */
14990b57cec5SDimitry Andric     if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
15000b57cec5SDimitry Andric         !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
15010b57cec5SDimitry Andric       // flag; release it
1502e8d8bef9SDimitry Andric       kmp_flag_64<> flag(&thr_bar->b_arrived,
1503e8d8bef9SDimitry Andric                          other_threads[thr_bar->parent_tid]);
15040b57cec5SDimitry Andric       flag.release();
15050b57cec5SDimitry Andric     } else {
15060b57cec5SDimitry Andric       // Leaf does special release on "offset" bits of parent's b_arrived flag
15070b57cec5SDimitry Andric       thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
1508e8d8bef9SDimitry Andric       kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived,
1509e8d8bef9SDimitry Andric                            thr_bar->offset + 1);
15100b57cec5SDimitry Andric       flag.set_waiter(other_threads[thr_bar->parent_tid]);
15110b57cec5SDimitry Andric       flag.release();
15120b57cec5SDimitry Andric     }
1513fe6060f1SDimitry Andric   } else { // Primary thread needs to update the team's b_arrived value
15140b57cec5SDimitry Andric     team->t.t_bar[bt].b_arrived = new_state;
15150b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
15160b57cec5SDimitry Andric                   "arrived(%p) = %llu\n",
15170b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, team->t.t_id,
15180b57cec5SDimitry Andric                   &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
15190b57cec5SDimitry Andric   }
15200b57cec5SDimitry Andric   // Is the team access below unsafe or just technically invalid?
15210b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
15220b57cec5SDimitry Andric                 "barrier type %d\n",
15230b57cec5SDimitry Andric                 gtid, team->t.t_id, tid, bt));
15240b57cec5SDimitry Andric }
15250b57cec5SDimitry Andric 
15260b57cec5SDimitry Andric static void __kmp_hierarchical_barrier_release(
15270b57cec5SDimitry Andric     enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
15280b57cec5SDimitry Andric     int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
15290b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
15300b57cec5SDimitry Andric   kmp_team_t *team;
15310b57cec5SDimitry Andric   kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
15320b57cec5SDimitry Andric   kmp_uint32 nproc;
15330b57cec5SDimitry Andric   bool team_change = false; // indicates on-core barrier shouldn't be used
15340b57cec5SDimitry Andric 
15350b57cec5SDimitry Andric   if (KMP_MASTER_TID(tid)) {
15360b57cec5SDimitry Andric     team = __kmp_threads[gtid]->th.th_team;
15370b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(team != NULL);
1538fe6060f1SDimitry Andric     KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
15390b57cec5SDimitry Andric                   "entered barrier type %d\n",
15400b57cec5SDimitry Andric                   gtid, team->t.t_id, tid, bt));
15410b57cec5SDimitry Andric   } else { // Worker threads
15420b57cec5SDimitry Andric     // Wait for parent thread to release me
15430b57cec5SDimitry Andric     if (!thr_bar->use_oncore_barrier ||
15440b57cec5SDimitry Andric         __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
15450b57cec5SDimitry Andric         thr_bar->team == NULL) {
15460b57cec5SDimitry Andric       // Use traditional method of waiting on my own b_go flag
15470b57cec5SDimitry Andric       thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
1548e8d8bef9SDimitry Andric       kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
15490b57cec5SDimitry Andric       flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
15500b57cec5SDimitry Andric       TCW_8(thr_bar->b_go,
15510b57cec5SDimitry Andric             KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
15520b57cec5SDimitry Andric     } else { // Thread barrier data is initialized, this is a leaf, blocktime is
15530b57cec5SDimitry Andric       // infinite, not nested
15540b57cec5SDimitry Andric       // Wait on my "offset" bits on parent's b_go flag
15550b57cec5SDimitry Andric       thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
15560b57cec5SDimitry Andric       kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
1557e8d8bef9SDimitry Andric                            thr_bar->offset + 1, bt,
15580b57cec5SDimitry Andric                            this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
15590b57cec5SDimitry Andric       flag.wait(this_thr, TRUE);
15600b57cec5SDimitry Andric       if (thr_bar->wait_flag ==
15610b57cec5SDimitry Andric           KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go
15620b57cec5SDimitry Andric         TCW_8(thr_bar->b_go,
15630b57cec5SDimitry Andric               KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
15640b57cec5SDimitry Andric       } else { // Reset my bits on parent's b_go flag
15650b57cec5SDimitry Andric         (RCAST(volatile char *,
1566e8d8bef9SDimitry Andric                &(thr_bar->parent_bar->b_go)))[thr_bar->offset + 1] = 0;
15670b57cec5SDimitry Andric       }
15680b57cec5SDimitry Andric     }
15690b57cec5SDimitry Andric     thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
15700b57cec5SDimitry Andric     // Early exit for reaping threads releasing forkjoin barrier
15710b57cec5SDimitry Andric     if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
15720b57cec5SDimitry Andric       return;
15730b57cec5SDimitry Andric     // The worker thread may now assume that the team is valid.
15740b57cec5SDimitry Andric     team = __kmp_threads[gtid]->th.th_team;
15750b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(team != NULL);
15760b57cec5SDimitry Andric     tid = __kmp_tid_from_gtid(gtid);
15770b57cec5SDimitry Andric 
15780b57cec5SDimitry Andric     KA_TRACE(
15790b57cec5SDimitry Andric         20,
15800b57cec5SDimitry Andric         ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
15810b57cec5SDimitry Andric          gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
15820b57cec5SDimitry Andric     KMP_MB(); // Flush all pending memory write invalidates.
15830b57cec5SDimitry Andric   }
15840b57cec5SDimitry Andric 
15850b57cec5SDimitry Andric   nproc = this_thr->th.th_team_nproc;
15860b57cec5SDimitry Andric   int level = team->t.t_level;
15870b57cec5SDimitry Andric   if (team->t.t_threads[0]
15880b57cec5SDimitry Andric           ->th.th_teams_microtask) { // are we inside the teams construct?
15890b57cec5SDimitry Andric     if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
15900b57cec5SDimitry Andric         this_thr->th.th_teams_level == level)
15910b57cec5SDimitry Andric       ++level; // level was not increased in teams construct for team_of_workers
15920b57cec5SDimitry Andric     if (this_thr->th.th_teams_size.nteams > 1)
15930b57cec5SDimitry Andric       ++level; // level was not increased in teams construct for team_of_masters
15940b57cec5SDimitry Andric   }
15950b57cec5SDimitry Andric   if (level == 1)
15960b57cec5SDimitry Andric     thr_bar->use_oncore_barrier = 1;
15970b57cec5SDimitry Andric   else
15980b57cec5SDimitry Andric     thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
15990b57cec5SDimitry Andric 
16000b57cec5SDimitry Andric   // If the team size has increased, we still communicate with old leaves via
16010b57cec5SDimitry Andric   // oncore barrier.
16020b57cec5SDimitry Andric   unsigned short int old_leaf_kids = thr_bar->leaf_kids;
16030b57cec5SDimitry Andric   kmp_uint64 old_leaf_state = thr_bar->leaf_state;
16040b57cec5SDimitry Andric   team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
16050b57cec5SDimitry Andric                                                        tid, team);
16060b57cec5SDimitry Andric   // But if the entire team changes, we won't use oncore barrier at all
16070b57cec5SDimitry Andric   if (team_change)
16080b57cec5SDimitry Andric     old_leaf_kids = 0;
16090b57cec5SDimitry Andric 
16100b57cec5SDimitry Andric #if KMP_BARRIER_ICV_PUSH
16110b57cec5SDimitry Andric   if (propagate_icvs) {
16120b57cec5SDimitry Andric     __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
16130b57cec5SDimitry Andric                              FALSE);
16140b57cec5SDimitry Andric     if (KMP_MASTER_TID(
1615fe6060f1SDimitry Andric             tid)) { // primary already has copy in final destination; copy
16160b57cec5SDimitry Andric       copy_icvs(&thr_bar->th_fixed_icvs,
16170b57cec5SDimitry Andric                 &team->t.t_implicit_task_taskdata[tid].td_icvs);
16180b57cec5SDimitry Andric     } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
16190b57cec5SDimitry Andric                thr_bar->use_oncore_barrier) { // optimization for inf blocktime
16200b57cec5SDimitry Andric       if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
16210b57cec5SDimitry Andric         // leaves (on-core children) pull parent's fixed ICVs directly to local
16220b57cec5SDimitry Andric         // ICV store
16230b57cec5SDimitry Andric         copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
16240b57cec5SDimitry Andric                   &thr_bar->parent_bar->th_fixed_icvs);
16250b57cec5SDimitry Andric       // non-leaves will get ICVs piggybacked with b_go via NGO store
16260b57cec5SDimitry Andric     } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
16270b57cec5SDimitry Andric       if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can
16280b57cec5SDimitry Andric         // access
16290b57cec5SDimitry Andric         copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
16300b57cec5SDimitry Andric       else // leaves copy parent's fixed ICVs directly to local ICV store
16310b57cec5SDimitry Andric         copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
16320b57cec5SDimitry Andric                   &thr_bar->parent_bar->th_fixed_icvs);
16330b57cec5SDimitry Andric     }
16340b57cec5SDimitry Andric   }
16350b57cec5SDimitry Andric #endif // KMP_BARRIER_ICV_PUSH
16360b57cec5SDimitry Andric 
16370b57cec5SDimitry Andric   // Now, release my children
16380b57cec5SDimitry Andric   if (thr_bar->my_level) { // not a leaf
16390b57cec5SDimitry Andric     kmp_int32 child_tid;
16400b57cec5SDimitry Andric     kmp_uint32 last;
16410b57cec5SDimitry Andric     if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
16420b57cec5SDimitry Andric         thr_bar->use_oncore_barrier) {
16430b57cec5SDimitry Andric       if (KMP_MASTER_TID(tid)) { // do a flat release
16440b57cec5SDimitry Andric         // Set local b_go to bump children via NGO store of the cache line
16450b57cec5SDimitry Andric         // containing IVCs and b_go.
16460b57cec5SDimitry Andric         thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
16470b57cec5SDimitry Andric         // Use ngo stores if available; b_go piggybacks in the last 8 bytes of
16480b57cec5SDimitry Andric         // the cache line
16490b57cec5SDimitry Andric         ngo_load(&thr_bar->th_fixed_icvs);
16500b57cec5SDimitry Andric         // This loops over all the threads skipping only the leaf nodes in the
16510b57cec5SDimitry Andric         // hierarchy
16520b57cec5SDimitry Andric         for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc;
16530b57cec5SDimitry Andric              child_tid += thr_bar->skip_per_level[1]) {
16540b57cec5SDimitry Andric           kmp_bstate_t *child_bar =
16550b57cec5SDimitry Andric               &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
16560b57cec5SDimitry Andric           KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
16570b57cec5SDimitry Andric                         "releasing T#%d(%d:%d)"
16580b57cec5SDimitry Andric                         " go(%p): %u => %u\n",
16590b57cec5SDimitry Andric                         gtid, team->t.t_id, tid,
16600b57cec5SDimitry Andric                         __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
16610b57cec5SDimitry Andric                         child_tid, &child_bar->b_go, child_bar->b_go,
16620b57cec5SDimitry Andric                         child_bar->b_go + KMP_BARRIER_STATE_BUMP));
16630b57cec5SDimitry Andric           // Use ngo store (if available) to both store ICVs and release child
16640b57cec5SDimitry Andric           // via child's b_go
16650b57cec5SDimitry Andric           ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
16660b57cec5SDimitry Andric         }
16670b57cec5SDimitry Andric         ngo_sync();
16680b57cec5SDimitry Andric       }
16690b57cec5SDimitry Andric       TCW_8(thr_bar->b_go,
16700b57cec5SDimitry Andric             KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
16710b57cec5SDimitry Andric       // Now, release leaf children
16720b57cec5SDimitry Andric       if (thr_bar->leaf_kids) { // if there are any
16730b57cec5SDimitry Andric         // We test team_change on the off-chance that the level 1 team changed.
16740b57cec5SDimitry Andric         if (team_change ||
16750b57cec5SDimitry Andric             old_leaf_kids < thr_bar->leaf_kids) { // some old, some new
16760b57cec5SDimitry Andric           if (old_leaf_kids) { // release old leaf kids
16770b57cec5SDimitry Andric             thr_bar->b_go |= old_leaf_state;
16780b57cec5SDimitry Andric           }
16790b57cec5SDimitry Andric           // Release new leaf kids
16800b57cec5SDimitry Andric           last = tid + thr_bar->skip_per_level[1];
16810b57cec5SDimitry Andric           if (last > nproc)
16820b57cec5SDimitry Andric             last = nproc;
16830b57cec5SDimitry Andric           for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
16840b57cec5SDimitry Andric                ++child_tid) { // skip_per_level[0]=1
16850b57cec5SDimitry Andric             kmp_info_t *child_thr = team->t.t_threads[child_tid];
16860b57cec5SDimitry Andric             kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
16870b57cec5SDimitry Andric             KA_TRACE(
16880b57cec5SDimitry Andric                 20,
16890b57cec5SDimitry Andric                 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
16900b57cec5SDimitry Andric                  " T#%d(%d:%d) go(%p): %u => %u\n",
16910b57cec5SDimitry Andric                  gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
16920b57cec5SDimitry Andric                  team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
16930b57cec5SDimitry Andric                  child_bar->b_go + KMP_BARRIER_STATE_BUMP));
16940b57cec5SDimitry Andric             // Release child using child's b_go flag
1695e8d8bef9SDimitry Andric             kmp_flag_64<> flag(&child_bar->b_go, child_thr);
16960b57cec5SDimitry Andric             flag.release();
16970b57cec5SDimitry Andric           }
16980b57cec5SDimitry Andric         } else { // Release all children at once with leaf_state bits on my own
16990b57cec5SDimitry Andric           // b_go flag
17000b57cec5SDimitry Andric           thr_bar->b_go |= thr_bar->leaf_state;
17010b57cec5SDimitry Andric         }
17020b57cec5SDimitry Andric       }
17030b57cec5SDimitry Andric     } else { // Blocktime is not infinite; do a simple hierarchical release
17040b57cec5SDimitry Andric       for (int d = thr_bar->my_level - 1; d >= 0;
17050b57cec5SDimitry Andric            --d) { // Release highest level threads first
17060b57cec5SDimitry Andric         last = tid + thr_bar->skip_per_level[d + 1];
17070b57cec5SDimitry Andric         kmp_uint32 skip = thr_bar->skip_per_level[d];
17080b57cec5SDimitry Andric         if (last > nproc)
17090b57cec5SDimitry Andric           last = nproc;
17100b57cec5SDimitry Andric         for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
17110b57cec5SDimitry Andric           kmp_info_t *child_thr = team->t.t_threads[child_tid];
17120b57cec5SDimitry Andric           kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
17130b57cec5SDimitry Andric           KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
17140b57cec5SDimitry Andric                         "releasing T#%d(%d:%d) go(%p): %u => %u\n",
17150b57cec5SDimitry Andric                         gtid, team->t.t_id, tid,
17160b57cec5SDimitry Andric                         __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
17170b57cec5SDimitry Andric                         child_tid, &child_bar->b_go, child_bar->b_go,
17180b57cec5SDimitry Andric                         child_bar->b_go + KMP_BARRIER_STATE_BUMP));
17190b57cec5SDimitry Andric           // Release child using child's b_go flag
1720e8d8bef9SDimitry Andric           kmp_flag_64<> flag(&child_bar->b_go, child_thr);
17210b57cec5SDimitry Andric           flag.release();
17220b57cec5SDimitry Andric         }
17230b57cec5SDimitry Andric       }
17240b57cec5SDimitry Andric     }
17250b57cec5SDimitry Andric #if KMP_BARRIER_ICV_PUSH
17260b57cec5SDimitry Andric     if (propagate_icvs && !KMP_MASTER_TID(tid))
17270b57cec5SDimitry Andric       // non-leaves copy ICVs from fixed ICVs to local dest
17280b57cec5SDimitry Andric       copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
17290b57cec5SDimitry Andric                 &thr_bar->th_fixed_icvs);
17300b57cec5SDimitry Andric #endif // KMP_BARRIER_ICV_PUSH
17310b57cec5SDimitry Andric   }
17320b57cec5SDimitry Andric   KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
17330b57cec5SDimitry Andric                 "barrier type %d\n",
17340b57cec5SDimitry Andric                 gtid, team->t.t_id, tid, bt));
17350b57cec5SDimitry Andric }
17360b57cec5SDimitry Andric 
17370b57cec5SDimitry Andric // End of Barrier Algorithms
17380b57cec5SDimitry Andric 
17390b57cec5SDimitry Andric // type traits for cancellable value
17400b57cec5SDimitry Andric // if cancellable is true, then is_cancellable is a normal boolean variable
17410b57cec5SDimitry Andric // if cancellable is false, then is_cancellable is a compile time constant
17420b57cec5SDimitry Andric template <bool cancellable> struct is_cancellable {};
17430b57cec5SDimitry Andric template <> struct is_cancellable<true> {
17440b57cec5SDimitry Andric   bool value;
17450b57cec5SDimitry Andric   is_cancellable() : value(false) {}
17460b57cec5SDimitry Andric   is_cancellable(bool b) : value(b) {}
17470b57cec5SDimitry Andric   is_cancellable &operator=(bool b) {
17480b57cec5SDimitry Andric     value = b;
17490b57cec5SDimitry Andric     return *this;
17500b57cec5SDimitry Andric   }
17510b57cec5SDimitry Andric   operator bool() const { return value; }
17520b57cec5SDimitry Andric };
17530b57cec5SDimitry Andric template <> struct is_cancellable<false> {
17540b57cec5SDimitry Andric   is_cancellable &operator=(bool b) { return *this; }
17550b57cec5SDimitry Andric   constexpr operator bool() const { return false; }
17560b57cec5SDimitry Andric };
17570b57cec5SDimitry Andric 
17580b57cec5SDimitry Andric // Internal function to do a barrier.
17590b57cec5SDimitry Andric /* If is_split is true, do a split barrier, otherwise, do a plain barrier
17600b57cec5SDimitry Andric    If reduce is non-NULL, do a split reduction barrier, otherwise, do a split
17610b57cec5SDimitry Andric    barrier
17620b57cec5SDimitry Andric    When cancellable = false,
1763fe6060f1SDimitry Andric      Returns 0 if primary thread, 1 if worker thread.
17640b57cec5SDimitry Andric    When cancellable = true
17650b57cec5SDimitry Andric      Returns 0 if not cancelled, 1 if cancelled.  */
17660b57cec5SDimitry Andric template <bool cancellable = false>
17670b57cec5SDimitry Andric static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
17680b57cec5SDimitry Andric                                   size_t reduce_size, void *reduce_data,
17690b57cec5SDimitry Andric                                   void (*reduce)(void *, void *)) {
17700b57cec5SDimitry Andric   KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
17710b57cec5SDimitry Andric   KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
17720b57cec5SDimitry Andric   int tid = __kmp_tid_from_gtid(gtid);
17730b57cec5SDimitry Andric   kmp_info_t *this_thr = __kmp_threads[gtid];
17740b57cec5SDimitry Andric   kmp_team_t *team = this_thr->th.th_team;
17750b57cec5SDimitry Andric   int status = 0;
17760b57cec5SDimitry Andric   is_cancellable<cancellable> cancelled;
17770b57cec5SDimitry Andric #if OMPT_SUPPORT && OMPT_OPTIONAL
17780b57cec5SDimitry Andric   ompt_data_t *my_task_data;
17790b57cec5SDimitry Andric   ompt_data_t *my_parallel_data;
17800b57cec5SDimitry Andric   void *return_address;
17810b57cec5SDimitry Andric   ompt_sync_region_t barrier_kind;
17820b57cec5SDimitry Andric #endif
17830b57cec5SDimitry Andric 
17840b57cec5SDimitry Andric   KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
17850b57cec5SDimitry Andric                 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
17860b57cec5SDimitry Andric 
17870b57cec5SDimitry Andric #if OMPT_SUPPORT
17880b57cec5SDimitry Andric   if (ompt_enabled.enabled) {
17890b57cec5SDimitry Andric #if OMPT_OPTIONAL
17900b57cec5SDimitry Andric     my_task_data = OMPT_CUR_TASK_DATA(this_thr);
17910b57cec5SDimitry Andric     my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
17920b57cec5SDimitry Andric     return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
17930b57cec5SDimitry Andric     barrier_kind = __ompt_get_barrier_kind(bt, this_thr);
17940b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_sync_region) {
17950b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
17960b57cec5SDimitry Andric           barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
17970b57cec5SDimitry Andric           return_address);
17980b57cec5SDimitry Andric     }
17990b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_sync_region_wait) {
18000b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
18010b57cec5SDimitry Andric           barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
18020b57cec5SDimitry Andric           return_address);
18030b57cec5SDimitry Andric     }
18040b57cec5SDimitry Andric #endif
18050b57cec5SDimitry Andric     // It is OK to report the barrier state after the barrier begin callback.
18060b57cec5SDimitry Andric     // According to the OMPT specification, a compliant implementation may
18070b57cec5SDimitry Andric     // even delay reporting this state until the barrier begins to wait.
1808*0fca6ea1SDimitry Andric     auto *ompt_thr_info = &this_thr->th.ompt_thread_info;
1809*0fca6ea1SDimitry Andric     switch (barrier_kind) {
1810*0fca6ea1SDimitry Andric     case ompt_sync_region_barrier_explicit:
1811*0fca6ea1SDimitry Andric       ompt_thr_info->state = ompt_state_wait_barrier_explicit;
1812*0fca6ea1SDimitry Andric       break;
1813*0fca6ea1SDimitry Andric     case ompt_sync_region_barrier_implicit_workshare:
1814*0fca6ea1SDimitry Andric       ompt_thr_info->state = ompt_state_wait_barrier_implicit_workshare;
1815*0fca6ea1SDimitry Andric       break;
1816*0fca6ea1SDimitry Andric     case ompt_sync_region_barrier_implicit_parallel:
1817*0fca6ea1SDimitry Andric       ompt_thr_info->state = ompt_state_wait_barrier_implicit_parallel;
1818*0fca6ea1SDimitry Andric       break;
1819*0fca6ea1SDimitry Andric     case ompt_sync_region_barrier_teams:
1820*0fca6ea1SDimitry Andric       ompt_thr_info->state = ompt_state_wait_barrier_teams;
1821*0fca6ea1SDimitry Andric       break;
1822*0fca6ea1SDimitry Andric     case ompt_sync_region_barrier_implementation:
1823*0fca6ea1SDimitry Andric       [[fallthrough]];
1824*0fca6ea1SDimitry Andric     default:
1825*0fca6ea1SDimitry Andric       ompt_thr_info->state = ompt_state_wait_barrier_implementation;
1826*0fca6ea1SDimitry Andric     }
18270b57cec5SDimitry Andric   }
18280b57cec5SDimitry Andric #endif
18290b57cec5SDimitry Andric 
18300b57cec5SDimitry Andric   if (!team->t.t_serialized) {
18310b57cec5SDimitry Andric #if USE_ITT_BUILD
18320b57cec5SDimitry Andric     // This value will be used in itt notify events below.
18330b57cec5SDimitry Andric     void *itt_sync_obj = NULL;
18340b57cec5SDimitry Andric #if USE_ITT_NOTIFY
18350b57cec5SDimitry Andric     if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
18360b57cec5SDimitry Andric       itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
18370b57cec5SDimitry Andric #endif
18380b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
18390b57cec5SDimitry Andric     if (__kmp_tasking_mode == tskm_extra_barrier) {
18400b57cec5SDimitry Andric       __kmp_tasking_barrier(team, this_thr, gtid);
18410b57cec5SDimitry Andric       KA_TRACE(15,
18420b57cec5SDimitry Andric                ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
18430b57cec5SDimitry Andric                 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
18440b57cec5SDimitry Andric     }
18450b57cec5SDimitry Andric 
18460b57cec5SDimitry Andric     /* Copy the blocktime info to the thread, where __kmp_wait_template() can
18470b57cec5SDimitry Andric        access it when the team struct is not guaranteed to exist. */
18480b57cec5SDimitry Andric     // See note about the corresponding code in __kmp_join_barrier() being
18490b57cec5SDimitry Andric     // performance-critical.
18500b57cec5SDimitry Andric     if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
18510b57cec5SDimitry Andric #if KMP_USE_MONITOR
18520b57cec5SDimitry Andric       this_thr->th.th_team_bt_intervals =
18530b57cec5SDimitry Andric           team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
18540b57cec5SDimitry Andric       this_thr->th.th_team_bt_set =
18550b57cec5SDimitry Andric           team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
18560b57cec5SDimitry Andric #else
18570b57cec5SDimitry Andric       this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
18580b57cec5SDimitry Andric #endif
18590b57cec5SDimitry Andric     }
18600b57cec5SDimitry Andric 
18610b57cec5SDimitry Andric #if USE_ITT_BUILD
18620b57cec5SDimitry Andric     if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
18630b57cec5SDimitry Andric       __kmp_itt_barrier_starting(gtid, itt_sync_obj);
18640b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
18650b57cec5SDimitry Andric #if USE_DEBUGGER
18660b57cec5SDimitry Andric     // Let the debugger know: the thread arrived to the barrier and waiting.
1867fe6060f1SDimitry Andric     if (KMP_MASTER_TID(tid)) { // Primary thread counter stored in team struct
18680b57cec5SDimitry Andric       team->t.t_bar[bt].b_master_arrived += 1;
18690b57cec5SDimitry Andric     } else {
18700b57cec5SDimitry Andric       this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
18710b57cec5SDimitry Andric     } // if
18720b57cec5SDimitry Andric #endif /* USE_DEBUGGER */
18730b57cec5SDimitry Andric     if (reduce != NULL) {
18740b57cec5SDimitry Andric       // KMP_DEBUG_ASSERT( is_split == TRUE );  // #C69956
18750b57cec5SDimitry Andric       this_thr->th.th_local.reduce_data = reduce_data;
18760b57cec5SDimitry Andric     }
18770b57cec5SDimitry Andric 
18780b57cec5SDimitry Andric     if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1879*0fca6ea1SDimitry Andric       __kmp_task_team_setup(this_thr, team);
18800b57cec5SDimitry Andric 
18810b57cec5SDimitry Andric     if (cancellable) {
18820b57cec5SDimitry Andric       cancelled = __kmp_linear_barrier_gather_cancellable(
18830b57cec5SDimitry Andric           bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
18840b57cec5SDimitry Andric     } else {
18850b57cec5SDimitry Andric       switch (__kmp_barrier_gather_pattern[bt]) {
1886349cc55cSDimitry Andric       case bp_dist_bar: {
1887349cc55cSDimitry Andric         __kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
1888349cc55cSDimitry Andric                                   reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1889349cc55cSDimitry Andric         break;
1890349cc55cSDimitry Andric       }
18910b57cec5SDimitry Andric       case bp_hyper_bar: {
18920b57cec5SDimitry Andric         // don't set branch bits to 0; use linear
18930b57cec5SDimitry Andric         KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
18940b57cec5SDimitry Andric         __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
18950b57cec5SDimitry Andric                                    reduce USE_ITT_BUILD_ARG(itt_sync_obj));
18960b57cec5SDimitry Andric         break;
18970b57cec5SDimitry Andric       }
18980b57cec5SDimitry Andric       case bp_hierarchical_bar: {
18990b57cec5SDimitry Andric         __kmp_hierarchical_barrier_gather(
19000b57cec5SDimitry Andric             bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
19010b57cec5SDimitry Andric         break;
19020b57cec5SDimitry Andric       }
19030b57cec5SDimitry Andric       case bp_tree_bar: {
19040b57cec5SDimitry Andric         // don't set branch bits to 0; use linear
19050b57cec5SDimitry Andric         KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
19060b57cec5SDimitry Andric         __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
19070b57cec5SDimitry Andric                                   reduce USE_ITT_BUILD_ARG(itt_sync_obj));
19080b57cec5SDimitry Andric         break;
19090b57cec5SDimitry Andric       }
19100b57cec5SDimitry Andric       default: {
19110b57cec5SDimitry Andric         __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
19120b57cec5SDimitry Andric                                     reduce USE_ITT_BUILD_ARG(itt_sync_obj));
19130b57cec5SDimitry Andric       }
19140b57cec5SDimitry Andric       }
19150b57cec5SDimitry Andric     }
19160b57cec5SDimitry Andric 
19170b57cec5SDimitry Andric     KMP_MB();
19180b57cec5SDimitry Andric 
19190b57cec5SDimitry Andric     if (KMP_MASTER_TID(tid)) {
19200b57cec5SDimitry Andric       status = 0;
19210b57cec5SDimitry Andric       if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
19220b57cec5SDimitry Andric         __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
19230b57cec5SDimitry Andric       }
19240b57cec5SDimitry Andric #if USE_DEBUGGER
19250b57cec5SDimitry Andric       // Let the debugger know: All threads are arrived and starting leaving the
19260b57cec5SDimitry Andric       // barrier.
19270b57cec5SDimitry Andric       team->t.t_bar[bt].b_team_arrived += 1;
19280b57cec5SDimitry Andric #endif
19290b57cec5SDimitry Andric 
19300b57cec5SDimitry Andric       if (__kmp_omp_cancellation) {
19310b57cec5SDimitry Andric         kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request);
19320b57cec5SDimitry Andric         // Reset cancellation flag for worksharing constructs
19330b57cec5SDimitry Andric         if (cancel_request == cancel_loop ||
19340b57cec5SDimitry Andric             cancel_request == cancel_sections) {
19350b57cec5SDimitry Andric           KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq);
19360b57cec5SDimitry Andric         }
19370b57cec5SDimitry Andric       }
19380b57cec5SDimitry Andric #if USE_ITT_BUILD
1939fe6060f1SDimitry Andric       /* TODO: In case of split reduction barrier, primary thread may send
19400b57cec5SDimitry Andric          acquired event early, before the final summation into the shared
19410b57cec5SDimitry Andric          variable is done (final summation can be a long operation for array
19420b57cec5SDimitry Andric          reductions).  */
19430b57cec5SDimitry Andric       if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
19440b57cec5SDimitry Andric         __kmp_itt_barrier_middle(gtid, itt_sync_obj);
19450b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
19460b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
19470b57cec5SDimitry Andric       // Barrier - report frame end (only if active_level == 1)
19480b57cec5SDimitry Andric       if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
19490b57cec5SDimitry Andric           __kmp_forkjoin_frames_mode &&
1950e8d8bef9SDimitry Andric           (this_thr->th.th_teams_microtask == NULL || // either not in teams
1951e8d8bef9SDimitry Andric            this_thr->th.th_teams_size.nteams == 1) && // or inside single team
19520b57cec5SDimitry Andric           team->t.t_active_level == 1) {
19530b57cec5SDimitry Andric         ident_t *loc = __kmp_threads[gtid]->th.th_ident;
19540b57cec5SDimitry Andric         kmp_uint64 cur_time = __itt_get_timestamp();
19550b57cec5SDimitry Andric         kmp_info_t **other_threads = team->t.t_threads;
19560b57cec5SDimitry Andric         int nproc = this_thr->th.th_team_nproc;
19570b57cec5SDimitry Andric         int i;
19580b57cec5SDimitry Andric         switch (__kmp_forkjoin_frames_mode) {
19590b57cec5SDimitry Andric         case 1:
19600b57cec5SDimitry Andric           __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
19610b57cec5SDimitry Andric                                  loc, nproc);
19620b57cec5SDimitry Andric           this_thr->th.th_frame_time = cur_time;
19630b57cec5SDimitry Andric           break;
19640b57cec5SDimitry Andric         case 2: // AC 2015-01-19: currently does not work for hierarchical (to
19650b57cec5SDimitry Andric           // be fixed)
19660b57cec5SDimitry Andric           __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
19670b57cec5SDimitry Andric                                  1, loc, nproc);
19680b57cec5SDimitry Andric           break;
19690b57cec5SDimitry Andric         case 3:
19700b57cec5SDimitry Andric           if (__itt_metadata_add_ptr) {
1971fe6060f1SDimitry Andric             // Initialize with primary thread's wait time
19720b57cec5SDimitry Andric             kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
19730b57cec5SDimitry Andric             // Set arrive time to zero to be able to check it in
19740b57cec5SDimitry Andric             // __kmp_invoke_task(); the same is done inside the loop below
19750b57cec5SDimitry Andric             this_thr->th.th_bar_arrive_time = 0;
19760b57cec5SDimitry Andric             for (i = 1; i < nproc; ++i) {
19770b57cec5SDimitry Andric               delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
19780b57cec5SDimitry Andric               other_threads[i]->th.th_bar_arrive_time = 0;
19790b57cec5SDimitry Andric             }
19800b57cec5SDimitry Andric             __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
19810b57cec5SDimitry Andric                                          cur_time, delta,
19820b57cec5SDimitry Andric                                          (kmp_uint64)(reduce != NULL));
19830b57cec5SDimitry Andric           }
19840b57cec5SDimitry Andric           __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
19850b57cec5SDimitry Andric                                  loc, nproc);
19860b57cec5SDimitry Andric           this_thr->th.th_frame_time = cur_time;
19870b57cec5SDimitry Andric           break;
19880b57cec5SDimitry Andric         }
19890b57cec5SDimitry Andric       }
19900b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
19910b57cec5SDimitry Andric     } else {
19920b57cec5SDimitry Andric       status = 1;
19930b57cec5SDimitry Andric #if USE_ITT_BUILD
19940b57cec5SDimitry Andric       if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
19950b57cec5SDimitry Andric         __kmp_itt_barrier_middle(gtid, itt_sync_obj);
19960b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
19970b57cec5SDimitry Andric     }
19980b57cec5SDimitry Andric     if ((status == 1 || !is_split) && !cancelled) {
19990b57cec5SDimitry Andric       if (cancellable) {
20000b57cec5SDimitry Andric         cancelled = __kmp_linear_barrier_release_cancellable(
20010b57cec5SDimitry Andric             bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
20020b57cec5SDimitry Andric       } else {
20030b57cec5SDimitry Andric         switch (__kmp_barrier_release_pattern[bt]) {
2004349cc55cSDimitry Andric         case bp_dist_bar: {
2005349cc55cSDimitry Andric           KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
2006349cc55cSDimitry Andric           __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
2007349cc55cSDimitry Andric                                      FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
2008349cc55cSDimitry Andric           break;
2009349cc55cSDimitry Andric         }
20100b57cec5SDimitry Andric         case bp_hyper_bar: {
20110b57cec5SDimitry Andric           KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
20120b57cec5SDimitry Andric           __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
20130b57cec5SDimitry Andric                                       FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
20140b57cec5SDimitry Andric           break;
20150b57cec5SDimitry Andric         }
20160b57cec5SDimitry Andric         case bp_hierarchical_bar: {
20170b57cec5SDimitry Andric           __kmp_hierarchical_barrier_release(
20180b57cec5SDimitry Andric               bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
20190b57cec5SDimitry Andric           break;
20200b57cec5SDimitry Andric         }
20210b57cec5SDimitry Andric         case bp_tree_bar: {
20220b57cec5SDimitry Andric           KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
20230b57cec5SDimitry Andric           __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
20240b57cec5SDimitry Andric                                      FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
20250b57cec5SDimitry Andric           break;
20260b57cec5SDimitry Andric         }
20270b57cec5SDimitry Andric         default: {
20280b57cec5SDimitry Andric           __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
20290b57cec5SDimitry Andric                                        FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
20300b57cec5SDimitry Andric         }
20310b57cec5SDimitry Andric         }
20320b57cec5SDimitry Andric       }
20330b57cec5SDimitry Andric       if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
20340b57cec5SDimitry Andric         __kmp_task_team_sync(this_thr, team);
20350b57cec5SDimitry Andric       }
20360b57cec5SDimitry Andric     }
20370b57cec5SDimitry Andric 
20380b57cec5SDimitry Andric #if USE_ITT_BUILD
20390b57cec5SDimitry Andric     /* GEH: TODO: Move this under if-condition above and also include in
20400b57cec5SDimitry Andric        __kmp_end_split_barrier(). This will more accurately represent the actual
20410b57cec5SDimitry Andric        release time of the threads for split barriers.  */
20420b57cec5SDimitry Andric     if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
20430b57cec5SDimitry Andric       __kmp_itt_barrier_finished(gtid, itt_sync_obj);
20440b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
20450b57cec5SDimitry Andric   } else { // Team is serialized.
20460b57cec5SDimitry Andric     status = 0;
20470b57cec5SDimitry Andric     if (__kmp_tasking_mode != tskm_immediate_exec) {
20480b57cec5SDimitry Andric       if (this_thr->th.th_task_team != NULL) {
20490b57cec5SDimitry Andric #if USE_ITT_NOTIFY
20500b57cec5SDimitry Andric         void *itt_sync_obj = NULL;
20510b57cec5SDimitry Andric         if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
20520b57cec5SDimitry Andric           itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
20530b57cec5SDimitry Andric           __kmp_itt_barrier_starting(gtid, itt_sync_obj);
20540b57cec5SDimitry Andric         }
20550b57cec5SDimitry Andric #endif
20560b57cec5SDimitry Andric 
205704eeddc0SDimitry Andric         KMP_DEBUG_ASSERT(
205804eeddc0SDimitry Andric             this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE ||
205904eeddc0SDimitry Andric             this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==
20600b57cec5SDimitry Andric                 TRUE);
20610b57cec5SDimitry Andric         __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
2062*0fca6ea1SDimitry Andric         __kmp_task_team_setup(this_thr, team);
20630b57cec5SDimitry Andric 
20640b57cec5SDimitry Andric #if USE_ITT_BUILD
20650b57cec5SDimitry Andric         if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
20660b57cec5SDimitry Andric           __kmp_itt_barrier_finished(gtid, itt_sync_obj);
20670b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
20680b57cec5SDimitry Andric       }
20690b57cec5SDimitry Andric     }
20700b57cec5SDimitry Andric   }
20710b57cec5SDimitry Andric   KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
20720b57cec5SDimitry Andric                 gtid, __kmp_team_from_gtid(gtid)->t.t_id,
20730b57cec5SDimitry Andric                 __kmp_tid_from_gtid(gtid), status));
20740b57cec5SDimitry Andric 
20750b57cec5SDimitry Andric #if OMPT_SUPPORT
20760b57cec5SDimitry Andric   if (ompt_enabled.enabled) {
20770b57cec5SDimitry Andric #if OMPT_OPTIONAL
20780b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_sync_region_wait) {
20790b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
20800b57cec5SDimitry Andric           barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
20810b57cec5SDimitry Andric           return_address);
20820b57cec5SDimitry Andric     }
20830b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_sync_region) {
20840b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
20850b57cec5SDimitry Andric           barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
20860b57cec5SDimitry Andric           return_address);
20870b57cec5SDimitry Andric     }
20880b57cec5SDimitry Andric #endif
20890b57cec5SDimitry Andric     this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
20900b57cec5SDimitry Andric   }
20910b57cec5SDimitry Andric #endif
20920b57cec5SDimitry Andric 
20930b57cec5SDimitry Andric   if (cancellable)
20940b57cec5SDimitry Andric     return (int)cancelled;
20950b57cec5SDimitry Andric   return status;
20960b57cec5SDimitry Andric }
20970b57cec5SDimitry Andric 
2098fe6060f1SDimitry Andric // Returns 0 if primary thread, 1 if worker thread.
20990b57cec5SDimitry Andric int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
21000b57cec5SDimitry Andric                   size_t reduce_size, void *reduce_data,
21010b57cec5SDimitry Andric                   void (*reduce)(void *, void *)) {
21020b57cec5SDimitry Andric   return __kmp_barrier_template<>(bt, gtid, is_split, reduce_size, reduce_data,
21030b57cec5SDimitry Andric                                   reduce);
21040b57cec5SDimitry Andric }
21050b57cec5SDimitry Andric 
21060b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
21070b57cec5SDimitry Andric // Returns 1 if cancelled, 0 otherwise
21080b57cec5SDimitry Andric int __kmp_barrier_gomp_cancel(int gtid) {
21090b57cec5SDimitry Andric   if (__kmp_omp_cancellation) {
21100b57cec5SDimitry Andric     int cancelled = __kmp_barrier_template<true>(bs_plain_barrier, gtid, FALSE,
21110b57cec5SDimitry Andric                                                  0, NULL, NULL);
21120b57cec5SDimitry Andric     if (cancelled) {
21130b57cec5SDimitry Andric       int tid = __kmp_tid_from_gtid(gtid);
21140b57cec5SDimitry Andric       kmp_info_t *this_thr = __kmp_threads[gtid];
21150b57cec5SDimitry Andric       if (KMP_MASTER_TID(tid)) {
2116fe6060f1SDimitry Andric         // Primary thread does not need to revert anything
21170b57cec5SDimitry Andric       } else {
21180b57cec5SDimitry Andric         // Workers need to revert their private b_arrived flag
21190b57cec5SDimitry Andric         this_thr->th.th_bar[bs_plain_barrier].bb.b_arrived -=
21200b57cec5SDimitry Andric             KMP_BARRIER_STATE_BUMP;
21210b57cec5SDimitry Andric       }
21220b57cec5SDimitry Andric     }
21230b57cec5SDimitry Andric     return cancelled;
21240b57cec5SDimitry Andric   }
21250b57cec5SDimitry Andric   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
21260b57cec5SDimitry Andric   return FALSE;
21270b57cec5SDimitry Andric }
21280b57cec5SDimitry Andric #endif
21290b57cec5SDimitry Andric 
21300b57cec5SDimitry Andric void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
21310b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
21320b57cec5SDimitry Andric   KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
2133fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(bt < bs_last_barrier);
21340b57cec5SDimitry Andric   int tid = __kmp_tid_from_gtid(gtid);
21350b57cec5SDimitry Andric   kmp_info_t *this_thr = __kmp_threads[gtid];
21360b57cec5SDimitry Andric   kmp_team_t *team = this_thr->th.th_team;
21370b57cec5SDimitry Andric 
21380b57cec5SDimitry Andric   if (!team->t.t_serialized) {
21390b57cec5SDimitry Andric     if (KMP_MASTER_GTID(gtid)) {
21400b57cec5SDimitry Andric       switch (__kmp_barrier_release_pattern[bt]) {
2141349cc55cSDimitry Andric       case bp_dist_bar: {
2142349cc55cSDimitry Andric         __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
2143349cc55cSDimitry Andric                                    FALSE USE_ITT_BUILD_ARG(NULL));
2144349cc55cSDimitry Andric         break;
2145349cc55cSDimitry Andric       }
21460b57cec5SDimitry Andric       case bp_hyper_bar: {
21470b57cec5SDimitry Andric         KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
21480b57cec5SDimitry Andric         __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
21490b57cec5SDimitry Andric                                     FALSE USE_ITT_BUILD_ARG(NULL));
21500b57cec5SDimitry Andric         break;
21510b57cec5SDimitry Andric       }
21520b57cec5SDimitry Andric       case bp_hierarchical_bar: {
21530b57cec5SDimitry Andric         __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
21540b57cec5SDimitry Andric                                            FALSE USE_ITT_BUILD_ARG(NULL));
21550b57cec5SDimitry Andric         break;
21560b57cec5SDimitry Andric       }
21570b57cec5SDimitry Andric       case bp_tree_bar: {
21580b57cec5SDimitry Andric         KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
21590b57cec5SDimitry Andric         __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
21600b57cec5SDimitry Andric                                    FALSE USE_ITT_BUILD_ARG(NULL));
21610b57cec5SDimitry Andric         break;
21620b57cec5SDimitry Andric       }
21630b57cec5SDimitry Andric       default: {
21640b57cec5SDimitry Andric         __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
21650b57cec5SDimitry Andric                                      FALSE USE_ITT_BUILD_ARG(NULL));
21660b57cec5SDimitry Andric       }
21670b57cec5SDimitry Andric       }
21680b57cec5SDimitry Andric       if (__kmp_tasking_mode != tskm_immediate_exec) {
21690b57cec5SDimitry Andric         __kmp_task_team_sync(this_thr, team);
21700b57cec5SDimitry Andric       } // if
21710b57cec5SDimitry Andric     }
21720b57cec5SDimitry Andric   }
21730b57cec5SDimitry Andric }
21740b57cec5SDimitry Andric 
21750b57cec5SDimitry Andric void __kmp_join_barrier(int gtid) {
21760b57cec5SDimitry Andric   KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
21770b57cec5SDimitry Andric   KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
2178fe6060f1SDimitry Andric 
2179fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
2180fe6060f1SDimitry Andric 
21810b57cec5SDimitry Andric   kmp_info_t *this_thr = __kmp_threads[gtid];
21820b57cec5SDimitry Andric   kmp_team_t *team;
21830b57cec5SDimitry Andric   int tid;
21840b57cec5SDimitry Andric #ifdef KMP_DEBUG
21850b57cec5SDimitry Andric   int team_id;
21860b57cec5SDimitry Andric #endif /* KMP_DEBUG */
21870b57cec5SDimitry Andric #if USE_ITT_BUILD
21880b57cec5SDimitry Andric   void *itt_sync_obj = NULL;
21890b57cec5SDimitry Andric #if USE_ITT_NOTIFY
21900b57cec5SDimitry Andric   if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need
21910b57cec5SDimitry Andric     // Get object created at fork_barrier
21920b57cec5SDimitry Andric     itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
21930b57cec5SDimitry Andric #endif
21940b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
219581ad6265SDimitry Andric #if ((USE_ITT_BUILD && USE_ITT_NOTIFY) || defined KMP_DEBUG)
219681ad6265SDimitry Andric   int nproc = this_thr->th.th_team_nproc;
219781ad6265SDimitry Andric #endif
21980b57cec5SDimitry Andric   KMP_MB();
21990b57cec5SDimitry Andric 
22000b57cec5SDimitry Andric   // Get current info
22010b57cec5SDimitry Andric   team = this_thr->th.th_team;
220281ad6265SDimitry Andric   KMP_DEBUG_ASSERT(nproc == team->t.t_nproc);
22030b57cec5SDimitry Andric   tid = __kmp_tid_from_gtid(gtid);
22040b57cec5SDimitry Andric #ifdef KMP_DEBUG
22050b57cec5SDimitry Andric   team_id = team->t.t_id;
2206349cc55cSDimitry Andric   kmp_info_t *master_thread = this_thr->th.th_team_master;
22070b57cec5SDimitry Andric   if (master_thread != team->t.t_threads[0]) {
22080b57cec5SDimitry Andric     __kmp_print_structure();
22090b57cec5SDimitry Andric   }
22100b57cec5SDimitry Andric #endif /* KMP_DEBUG */
22110b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
22120b57cec5SDimitry Andric   KMP_MB();
22130b57cec5SDimitry Andric 
22140b57cec5SDimitry Andric   // Verify state
22150b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
22160b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
22170b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
22180b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
22190b57cec5SDimitry Andric                 gtid, team_id, tid));
22200b57cec5SDimitry Andric 
22210b57cec5SDimitry Andric #if OMPT_SUPPORT
22220b57cec5SDimitry Andric   if (ompt_enabled.enabled) {
22230b57cec5SDimitry Andric #if OMPT_OPTIONAL
22240b57cec5SDimitry Andric     ompt_data_t *my_task_data;
22250b57cec5SDimitry Andric     ompt_data_t *my_parallel_data;
22260b57cec5SDimitry Andric     void *codeptr = NULL;
22270b57cec5SDimitry Andric     int ds_tid = this_thr->th.th_info.ds.ds_tid;
22280b57cec5SDimitry Andric     if (KMP_MASTER_TID(ds_tid) &&
22290b57cec5SDimitry Andric         (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
22300b57cec5SDimitry Andric          ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
22310b57cec5SDimitry Andric       codeptr = team->t.ompt_team_info.master_return_address;
22320b57cec5SDimitry Andric     my_task_data = OMPT_CUR_TASK_DATA(this_thr);
22330b57cec5SDimitry Andric     my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
2234*0fca6ea1SDimitry Andric     ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
2235*0fca6ea1SDimitry Andric     ompt_state_t ompt_state = ompt_state_wait_barrier_implicit_parallel;
2236*0fca6ea1SDimitry Andric     if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league) {
2237*0fca6ea1SDimitry Andric       sync_kind = ompt_sync_region_barrier_teams;
2238*0fca6ea1SDimitry Andric       ompt_state = ompt_state_wait_barrier_teams;
2239*0fca6ea1SDimitry Andric     }
22400b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_sync_region) {
22410b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2242*0fca6ea1SDimitry Andric           sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
22430b57cec5SDimitry Andric     }
22440b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_sync_region_wait) {
22450b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2246*0fca6ea1SDimitry Andric           sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
22470b57cec5SDimitry Andric     }
22480b57cec5SDimitry Andric     if (!KMP_MASTER_TID(ds_tid))
22490b57cec5SDimitry Andric       this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
22500b57cec5SDimitry Andric #endif
2251*0fca6ea1SDimitry Andric     this_thr->th.ompt_thread_info.state = ompt_state;
22520b57cec5SDimitry Andric   }
22530b57cec5SDimitry Andric #endif
22540b57cec5SDimitry Andric 
22550b57cec5SDimitry Andric   if (__kmp_tasking_mode == tskm_extra_barrier) {
22560b57cec5SDimitry Andric     __kmp_tasking_barrier(team, this_thr, gtid);
2257349cc55cSDimitry Andric     KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",
2258349cc55cSDimitry Andric                   gtid, team_id, tid));
22590b57cec5SDimitry Andric   }
22600b57cec5SDimitry Andric #ifdef KMP_DEBUG
22610b57cec5SDimitry Andric   if (__kmp_tasking_mode != tskm_immediate_exec) {
22620b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
22630b57cec5SDimitry Andric                   "%p, th_task_team = %p\n",
22640b57cec5SDimitry Andric                   __kmp_gtid_from_thread(this_thr), team_id,
22650b57cec5SDimitry Andric                   team->t.t_task_team[this_thr->th.th_task_state],
22660b57cec5SDimitry Andric                   this_thr->th.th_task_team));
2267*0fca6ea1SDimitry Andric     KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, this_thr);
22680b57cec5SDimitry Andric   }
22690b57cec5SDimitry Andric #endif /* KMP_DEBUG */
22700b57cec5SDimitry Andric 
22710b57cec5SDimitry Andric   /* Copy the blocktime info to the thread, where __kmp_wait_template() can
22720b57cec5SDimitry Andric      access it when the team struct is not guaranteed to exist. Doing these
22730b57cec5SDimitry Andric      loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,
22740b57cec5SDimitry Andric      we do not perform the copy if blocktime=infinite, since the values are not
22750b57cec5SDimitry Andric      used by __kmp_wait_template() in that case. */
22760b57cec5SDimitry Andric   if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
22770b57cec5SDimitry Andric #if KMP_USE_MONITOR
22780b57cec5SDimitry Andric     this_thr->th.th_team_bt_intervals =
22790b57cec5SDimitry Andric         team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
22800b57cec5SDimitry Andric     this_thr->th.th_team_bt_set =
22810b57cec5SDimitry Andric         team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
22820b57cec5SDimitry Andric #else
22830b57cec5SDimitry Andric     this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
22840b57cec5SDimitry Andric #endif
22850b57cec5SDimitry Andric   }
22860b57cec5SDimitry Andric 
22870b57cec5SDimitry Andric #if USE_ITT_BUILD
22880b57cec5SDimitry Andric   if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
22890b57cec5SDimitry Andric     __kmp_itt_barrier_starting(gtid, itt_sync_obj);
22900b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
22910b57cec5SDimitry Andric 
22920b57cec5SDimitry Andric   switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
2293349cc55cSDimitry Andric   case bp_dist_bar: {
2294349cc55cSDimitry Andric     __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2295349cc55cSDimitry Andric                               NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2296349cc55cSDimitry Andric     break;
2297349cc55cSDimitry Andric   }
22980b57cec5SDimitry Andric   case bp_hyper_bar: {
22990b57cec5SDimitry Andric     KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
23000b57cec5SDimitry Andric     __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
23010b57cec5SDimitry Andric                                NULL USE_ITT_BUILD_ARG(itt_sync_obj));
23020b57cec5SDimitry Andric     break;
23030b57cec5SDimitry Andric   }
23040b57cec5SDimitry Andric   case bp_hierarchical_bar: {
23050b57cec5SDimitry Andric     __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
23060b57cec5SDimitry Andric                                       NULL USE_ITT_BUILD_ARG(itt_sync_obj));
23070b57cec5SDimitry Andric     break;
23080b57cec5SDimitry Andric   }
23090b57cec5SDimitry Andric   case bp_tree_bar: {
23100b57cec5SDimitry Andric     KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
23110b57cec5SDimitry Andric     __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
23120b57cec5SDimitry Andric                               NULL USE_ITT_BUILD_ARG(itt_sync_obj));
23130b57cec5SDimitry Andric     break;
23140b57cec5SDimitry Andric   }
23150b57cec5SDimitry Andric   default: {
23160b57cec5SDimitry Andric     __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
23170b57cec5SDimitry Andric                                 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
23180b57cec5SDimitry Andric   }
23190b57cec5SDimitry Andric   }
23200b57cec5SDimitry Andric 
23210b57cec5SDimitry Andric   /* From this point on, the team data structure may be deallocated at any time
2322fe6060f1SDimitry Andric      by the primary thread - it is unsafe to reference it in any of the worker
23230b57cec5SDimitry Andric      threads. Any per-team data items that need to be referenced before the
23240b57cec5SDimitry Andric      end of the barrier should be moved to the kmp_task_team_t structs.  */
23250b57cec5SDimitry Andric   if (KMP_MASTER_TID(tid)) {
23260b57cec5SDimitry Andric     if (__kmp_tasking_mode != tskm_immediate_exec) {
23270b57cec5SDimitry Andric       __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
23280b57cec5SDimitry Andric     }
23290b57cec5SDimitry Andric     if (__kmp_display_affinity) {
23300b57cec5SDimitry Andric       KMP_CHECK_UPDATE(team->t.t_display_affinity, 0);
23310b57cec5SDimitry Andric     }
23320b57cec5SDimitry Andric #if KMP_STATS_ENABLED
2333fe6060f1SDimitry Andric     // Have primary thread flag the workers to indicate they are now waiting for
23340b57cec5SDimitry Andric     // next parallel region, Also wake them up so they switch their timers to
23350b57cec5SDimitry Andric     // idle.
23360b57cec5SDimitry Andric     for (int i = 0; i < team->t.t_nproc; ++i) {
23370b57cec5SDimitry Andric       kmp_info_t *team_thread = team->t.t_threads[i];
23380b57cec5SDimitry Andric       if (team_thread == this_thr)
23390b57cec5SDimitry Andric         continue;
23400b57cec5SDimitry Andric       team_thread->th.th_stats->setIdleFlag();
23410b57cec5SDimitry Andric       if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
23420b57cec5SDimitry Andric           team_thread->th.th_sleep_loc != NULL)
2343349cc55cSDimitry Andric         __kmp_null_resume_wrapper(team_thread);
23440b57cec5SDimitry Andric     }
23450b57cec5SDimitry Andric #endif
23460b57cec5SDimitry Andric #if USE_ITT_BUILD
23470b57cec5SDimitry Andric     if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
23480b57cec5SDimitry Andric       __kmp_itt_barrier_middle(gtid, itt_sync_obj);
23490b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
23500b57cec5SDimitry Andric 
23510b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
23520b57cec5SDimitry Andric     // Join barrier - report frame end
23530b57cec5SDimitry Andric     if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2354e8d8bef9SDimitry Andric         __kmp_forkjoin_frames_mode &&
2355e8d8bef9SDimitry Andric         (this_thr->th.th_teams_microtask == NULL || // either not in teams
2356e8d8bef9SDimitry Andric          this_thr->th.th_teams_size.nteams == 1) && // or inside single team
23570b57cec5SDimitry Andric         team->t.t_active_level == 1) {
23580b57cec5SDimitry Andric       kmp_uint64 cur_time = __itt_get_timestamp();
23590b57cec5SDimitry Andric       ident_t *loc = team->t.t_ident;
23600b57cec5SDimitry Andric       kmp_info_t **other_threads = team->t.t_threads;
23610b57cec5SDimitry Andric       switch (__kmp_forkjoin_frames_mode) {
23620b57cec5SDimitry Andric       case 1:
23630b57cec5SDimitry Andric         __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
23640b57cec5SDimitry Andric                                loc, nproc);
23650b57cec5SDimitry Andric         break;
23660b57cec5SDimitry Andric       case 2:
23670b57cec5SDimitry Andric         __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
23680b57cec5SDimitry Andric                                loc, nproc);
23690b57cec5SDimitry Andric         break;
23700b57cec5SDimitry Andric       case 3:
23710b57cec5SDimitry Andric         if (__itt_metadata_add_ptr) {
2372fe6060f1SDimitry Andric           // Initialize with primary thread's wait time
23730b57cec5SDimitry Andric           kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
23740b57cec5SDimitry Andric           // Set arrive time to zero to be able to check it in
23750b57cec5SDimitry Andric           // __kmp_invoke_task(); the same is done inside the loop below
23760b57cec5SDimitry Andric           this_thr->th.th_bar_arrive_time = 0;
237781ad6265SDimitry Andric           for (int i = 1; i < nproc; ++i) {
23780b57cec5SDimitry Andric             delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
23790b57cec5SDimitry Andric             other_threads[i]->th.th_bar_arrive_time = 0;
23800b57cec5SDimitry Andric           }
23810b57cec5SDimitry Andric           __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
23820b57cec5SDimitry Andric                                        cur_time, delta, 0);
23830b57cec5SDimitry Andric         }
23840b57cec5SDimitry Andric         __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
23850b57cec5SDimitry Andric                                loc, nproc);
23860b57cec5SDimitry Andric         this_thr->th.th_frame_time = cur_time;
23870b57cec5SDimitry Andric         break;
23880b57cec5SDimitry Andric       }
23890b57cec5SDimitry Andric     }
23900b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
23910b57cec5SDimitry Andric   }
23920b57cec5SDimitry Andric #if USE_ITT_BUILD
23930b57cec5SDimitry Andric   else {
23940b57cec5SDimitry Andric     if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
23950b57cec5SDimitry Andric       __kmp_itt_barrier_middle(gtid, itt_sync_obj);
23960b57cec5SDimitry Andric   }
23970b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
23980b57cec5SDimitry Andric 
23990b57cec5SDimitry Andric #if KMP_DEBUG
24000b57cec5SDimitry Andric   if (KMP_MASTER_TID(tid)) {
24010b57cec5SDimitry Andric     KA_TRACE(
24020b57cec5SDimitry Andric         15,
24030b57cec5SDimitry Andric         ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
24040b57cec5SDimitry Andric          gtid, team_id, tid, nproc));
24050b57cec5SDimitry Andric   }
24060b57cec5SDimitry Andric #endif /* KMP_DEBUG */
24070b57cec5SDimitry Andric 
24080b57cec5SDimitry Andric   // TODO now, mark worker threads as done so they may be disbanded
24090b57cec5SDimitry Andric   KMP_MB(); // Flush all pending memory write invalidates.
24100b57cec5SDimitry Andric   KA_TRACE(10,
24110b57cec5SDimitry Andric            ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
24120b57cec5SDimitry Andric 
24130b57cec5SDimitry Andric }
24140b57cec5SDimitry Andric 
24150b57cec5SDimitry Andric // TODO release worker threads' fork barriers as we are ready instead of all at
24160b57cec5SDimitry Andric // once
24170b57cec5SDimitry Andric void __kmp_fork_barrier(int gtid, int tid) {
24180b57cec5SDimitry Andric   KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
24190b57cec5SDimitry Andric   KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
24200b57cec5SDimitry Andric   kmp_info_t *this_thr = __kmp_threads[gtid];
24210b57cec5SDimitry Andric   kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
24220b57cec5SDimitry Andric #if USE_ITT_BUILD
24230b57cec5SDimitry Andric   void *itt_sync_obj = NULL;
24240b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
24257a6dacacSDimitry Andric #ifdef KMP_DEBUG
24260b57cec5SDimitry Andric   if (team)
24270b57cec5SDimitry Andric     KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
24280b57cec5SDimitry Andric                   (team != NULL) ? team->t.t_id : -1, tid));
24297a6dacacSDimitry Andric #endif
2430fe6060f1SDimitry Andric   // th_team pointer only valid for primary thread here
24310b57cec5SDimitry Andric   if (KMP_MASTER_TID(tid)) {
24320b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
24330b57cec5SDimitry Andric     if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
24340b57cec5SDimitry Andric       // Create itt barrier object
24350b57cec5SDimitry Andric       itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
24360b57cec5SDimitry Andric       __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
24370b57cec5SDimitry Andric     }
24380b57cec5SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
24390b57cec5SDimitry Andric 
24400b57cec5SDimitry Andric #ifdef KMP_DEBUG
2441fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(team);
24420b57cec5SDimitry Andric     kmp_info_t **other_threads = team->t.t_threads;
24430b57cec5SDimitry Andric     int i;
24440b57cec5SDimitry Andric 
24450b57cec5SDimitry Andric     // Verify state
24460b57cec5SDimitry Andric     KMP_MB();
24470b57cec5SDimitry Andric 
24480b57cec5SDimitry Andric     for (i = 1; i < team->t.t_nproc; ++i) {
24490b57cec5SDimitry Andric       KA_TRACE(500,
24500b57cec5SDimitry Andric                ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
24510b57cec5SDimitry Andric                 "== %u.\n",
24520b57cec5SDimitry Andric                 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
24530b57cec5SDimitry Andric                 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
24540b57cec5SDimitry Andric                 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
24550b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(
24560b57cec5SDimitry Andric           (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
24570b57cec5SDimitry Andric            ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
24580b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
24590b57cec5SDimitry Andric     }
24600b57cec5SDimitry Andric #endif
24610b57cec5SDimitry Andric 
2462*0fca6ea1SDimitry Andric     if (__kmp_tasking_mode != tskm_immediate_exec)
2463*0fca6ea1SDimitry Andric       __kmp_task_team_setup(this_thr, team);
24640b57cec5SDimitry Andric 
2465fe6060f1SDimitry Andric     /* The primary thread may have changed its blocktime between join barrier
2466fe6060f1SDimitry Andric        and fork barrier. Copy the blocktime info to the thread, where
24670b57cec5SDimitry Andric        __kmp_wait_template() can access it when the team struct is not
24680b57cec5SDimitry Andric        guaranteed to exist. */
24690b57cec5SDimitry Andric     // See note about the corresponding code in __kmp_join_barrier() being
24700b57cec5SDimitry Andric     // performance-critical
24710b57cec5SDimitry Andric     if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
24720b57cec5SDimitry Andric #if KMP_USE_MONITOR
24730b57cec5SDimitry Andric       this_thr->th.th_team_bt_intervals =
24740b57cec5SDimitry Andric           team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
24750b57cec5SDimitry Andric       this_thr->th.th_team_bt_set =
24760b57cec5SDimitry Andric           team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
24770b57cec5SDimitry Andric #else
24780b57cec5SDimitry Andric       this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
24790b57cec5SDimitry Andric #endif
24800b57cec5SDimitry Andric     }
2481fe6060f1SDimitry Andric   } // primary thread
24820b57cec5SDimitry Andric 
24830b57cec5SDimitry Andric   switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
2484349cc55cSDimitry Andric   case bp_dist_bar: {
2485349cc55cSDimitry Andric     __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2486349cc55cSDimitry Andric                                TRUE USE_ITT_BUILD_ARG(NULL));
2487349cc55cSDimitry Andric     break;
2488349cc55cSDimitry Andric   }
24890b57cec5SDimitry Andric   case bp_hyper_bar: {
24900b57cec5SDimitry Andric     KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
24910b57cec5SDimitry Andric     __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
24920b57cec5SDimitry Andric                                 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
24930b57cec5SDimitry Andric     break;
24940b57cec5SDimitry Andric   }
24950b57cec5SDimitry Andric   case bp_hierarchical_bar: {
24960b57cec5SDimitry Andric     __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
24970b57cec5SDimitry Andric                                        TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
24980b57cec5SDimitry Andric     break;
24990b57cec5SDimitry Andric   }
25000b57cec5SDimitry Andric   case bp_tree_bar: {
25010b57cec5SDimitry Andric     KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
25020b57cec5SDimitry Andric     __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
25030b57cec5SDimitry Andric                                TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
25040b57cec5SDimitry Andric     break;
25050b57cec5SDimitry Andric   }
25060b57cec5SDimitry Andric   default: {
25070b57cec5SDimitry Andric     __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
25080b57cec5SDimitry Andric                                  TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
25090b57cec5SDimitry Andric   }
25100b57cec5SDimitry Andric   }
25110b57cec5SDimitry Andric 
25120b57cec5SDimitry Andric #if OMPT_SUPPORT
2513*0fca6ea1SDimitry Andric   ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
25140b57cec5SDimitry Andric   if (ompt_enabled.enabled &&
2515*0fca6ea1SDimitry Andric       (ompt_state == ompt_state_wait_barrier_teams ||
2516*0fca6ea1SDimitry Andric        ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
25170b57cec5SDimitry Andric     int ds_tid = this_thr->th.th_info.ds.ds_tid;
25180b57cec5SDimitry Andric     ompt_data_t *task_data = (team)
25190b57cec5SDimitry Andric                                  ? OMPT_CUR_TASK_DATA(this_thr)
25200b57cec5SDimitry Andric                                  : &(this_thr->th.ompt_thread_info.task_data);
25210b57cec5SDimitry Andric     this_thr->th.ompt_thread_info.state = ompt_state_overhead;
25220b57cec5SDimitry Andric #if OMPT_OPTIONAL
25230b57cec5SDimitry Andric     void *codeptr = NULL;
25240b57cec5SDimitry Andric     if (KMP_MASTER_TID(ds_tid) &&
25250b57cec5SDimitry Andric         (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
25260b57cec5SDimitry Andric          ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
2527fe6060f1SDimitry Andric       codeptr = team ? team->t.ompt_team_info.master_return_address : NULL;
2528*0fca6ea1SDimitry Andric     ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
2529*0fca6ea1SDimitry Andric     if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
2530*0fca6ea1SDimitry Andric       sync_kind = ompt_sync_region_barrier_teams;
25310b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_sync_region_wait) {
25320b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2533*0fca6ea1SDimitry Andric           sync_kind, ompt_scope_end, NULL, task_data, codeptr);
25340b57cec5SDimitry Andric     }
25350b57cec5SDimitry Andric     if (ompt_enabled.ompt_callback_sync_region) {
25360b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2537*0fca6ea1SDimitry Andric           sync_kind, ompt_scope_end, NULL, task_data, codeptr);
25380b57cec5SDimitry Andric     }
25390b57cec5SDimitry Andric #endif
25400b57cec5SDimitry Andric     if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
25410b57cec5SDimitry Andric       ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2542fe6060f1SDimitry Andric           ompt_scope_end, NULL, task_data, 0, ds_tid,
2543fe6060f1SDimitry Andric           ompt_task_implicit); // TODO: Can this be ompt_task_initial?
25440b57cec5SDimitry Andric     }
25450b57cec5SDimitry Andric   }
25460b57cec5SDimitry Andric #endif
25470b57cec5SDimitry Andric 
25480b57cec5SDimitry Andric   // Early exit for reaping threads releasing forkjoin barrier
25490b57cec5SDimitry Andric   if (TCR_4(__kmp_global.g.g_done)) {
25500b57cec5SDimitry Andric     this_thr->th.th_task_team = NULL;
25510b57cec5SDimitry Andric 
25520b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
25530b57cec5SDimitry Andric     if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
25540b57cec5SDimitry Andric       if (!KMP_MASTER_TID(tid)) {
25550b57cec5SDimitry Andric         itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
25560b57cec5SDimitry Andric         if (itt_sync_obj)
25570b57cec5SDimitry Andric           __kmp_itt_barrier_finished(gtid, itt_sync_obj);
25580b57cec5SDimitry Andric       }
25590b57cec5SDimitry Andric     }
25600b57cec5SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
25610b57cec5SDimitry Andric     KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid));
25620b57cec5SDimitry Andric     return;
25630b57cec5SDimitry Andric   }
25640b57cec5SDimitry Andric 
25650b57cec5SDimitry Andric   /* We can now assume that a valid team structure has been allocated by the
2566fe6060f1SDimitry Andric      primary thread and propagated to all worker threads. The current thread,
2567fe6060f1SDimitry Andric      however, may not be part of the team, so we can't blindly assume that the
2568fe6060f1SDimitry Andric      team pointer is non-null.  */
25690b57cec5SDimitry Andric   team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
25700b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(team != NULL);
25710b57cec5SDimitry Andric   tid = __kmp_tid_from_gtid(gtid);
25720b57cec5SDimitry Andric 
25730b57cec5SDimitry Andric #if KMP_BARRIER_ICV_PULL
2574fe6060f1SDimitry Andric   /* Primary thread's copy of the ICVs was set up on the implicit taskdata in
2575fe6060f1SDimitry Andric      __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's
25760b57cec5SDimitry Andric      implicit task has this data before this function is called. We cannot
2577fe6060f1SDimitry Andric      modify __kmp_fork_call() to look at the fixed ICVs in the primary thread's
2578fe6060f1SDimitry Andric      thread struct, because it is not always the case that the threads arrays
2579fe6060f1SDimitry Andric      have been allocated when __kmp_fork_call() is executed. */
25800b57cec5SDimitry Andric   {
25810b57cec5SDimitry Andric     KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
2582fe6060f1SDimitry Andric     if (!KMP_MASTER_TID(tid)) { // primary thread already has ICVs
2583fe6060f1SDimitry Andric       // Copy the initial ICVs from the primary thread's thread struct to the
2584fe6060f1SDimitry Andric       // implicit task for this tid.
25850b57cec5SDimitry Andric       KA_TRACE(10,
25860b57cec5SDimitry Andric                ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
25870b57cec5SDimitry Andric       __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
25880b57cec5SDimitry Andric                                tid, FALSE);
25890b57cec5SDimitry Andric       copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
25900b57cec5SDimitry Andric                 &team->t.t_threads[0]
25910b57cec5SDimitry Andric                      ->th.th_bar[bs_forkjoin_barrier]
25920b57cec5SDimitry Andric                      .bb.th_fixed_icvs);
25930b57cec5SDimitry Andric     }
25940b57cec5SDimitry Andric   }
25950b57cec5SDimitry Andric #endif // KMP_BARRIER_ICV_PULL
25960b57cec5SDimitry Andric 
25970b57cec5SDimitry Andric   if (__kmp_tasking_mode != tskm_immediate_exec) {
25980b57cec5SDimitry Andric     __kmp_task_team_sync(this_thr, team);
25990b57cec5SDimitry Andric   }
26000b57cec5SDimitry Andric 
26010b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
26020b57cec5SDimitry Andric   kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
26030b57cec5SDimitry Andric   if (proc_bind == proc_bind_intel) {
26040b57cec5SDimitry Andric     // Call dynamic affinity settings
2605bdd1243dSDimitry Andric     if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) {
26060b57cec5SDimitry Andric       __kmp_balanced_affinity(this_thr, team->t.t_nproc);
26070b57cec5SDimitry Andric     }
26080b57cec5SDimitry Andric   } else if (proc_bind != proc_bind_false) {
26090b57cec5SDimitry Andric     if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
26100b57cec5SDimitry Andric       KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",
26110b57cec5SDimitry Andric                      __kmp_gtid_from_thread(this_thr),
26120b57cec5SDimitry Andric                      this_thr->th.th_current_place));
26130b57cec5SDimitry Andric     } else {
26145f757f3fSDimitry Andric       __kmp_affinity_bind_place(gtid);
26150b57cec5SDimitry Andric     }
26160b57cec5SDimitry Andric   }
26170b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
26180b57cec5SDimitry Andric   // Perform the display affinity functionality
26190b57cec5SDimitry Andric   if (__kmp_display_affinity) {
26200b57cec5SDimitry Andric     if (team->t.t_display_affinity
26210b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
2622bdd1243dSDimitry Andric         || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed)
26230b57cec5SDimitry Andric #endif
26240b57cec5SDimitry Andric     ) {
26250b57cec5SDimitry Andric       // NULL means use the affinity-format-var ICV
26260b57cec5SDimitry Andric       __kmp_aux_display_affinity(gtid, NULL);
26270b57cec5SDimitry Andric       this_thr->th.th_prev_num_threads = team->t.t_nproc;
26280b57cec5SDimitry Andric       this_thr->th.th_prev_level = team->t.t_level;
26290b57cec5SDimitry Andric     }
26300b57cec5SDimitry Andric   }
26310b57cec5SDimitry Andric   if (!KMP_MASTER_TID(tid))
26320b57cec5SDimitry Andric     KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator);
26330b57cec5SDimitry Andric 
26340b57cec5SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY
26350b57cec5SDimitry Andric   if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
26360b57cec5SDimitry Andric     if (!KMP_MASTER_TID(tid)) {
26370b57cec5SDimitry Andric       // Get correct barrier object
26380b57cec5SDimitry Andric       itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
26390b57cec5SDimitry Andric       __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
26400b57cec5SDimitry Andric     } // (prepare called inside barrier_release)
26410b57cec5SDimitry Andric   }
26420b57cec5SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
26430b57cec5SDimitry Andric   KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
26440b57cec5SDimitry Andric                 team->t.t_id, tid));
26450b57cec5SDimitry Andric }
26460b57cec5SDimitry Andric 
26470b57cec5SDimitry Andric void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
26480b57cec5SDimitry Andric                           kmp_internal_control_t *new_icvs, ident_t *loc) {
26490b57cec5SDimitry Andric   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
26500b57cec5SDimitry Andric 
26510b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
26520b57cec5SDimitry Andric   KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
26530b57cec5SDimitry Andric 
2654fe6060f1SDimitry Andric /* Primary thread's copy of the ICVs was set up on the implicit taskdata in
2655fe6060f1SDimitry Andric    __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's
26560b57cec5SDimitry Andric    implicit task has this data before this function is called. */
26570b57cec5SDimitry Andric #if KMP_BARRIER_ICV_PULL
2658fe6060f1SDimitry Andric   /* Copy ICVs to primary thread's thread structure into th_fixed_icvs (which
2659fe6060f1SDimitry Andric      remains untouched), where all of the worker threads can access them and
2660fe6060f1SDimitry Andric      make their own copies after the barrier. */
26610b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
26620b57cec5SDimitry Andric   // allocated at this point
26630b57cec5SDimitry Andric   copy_icvs(
26640b57cec5SDimitry Andric       &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
26650b57cec5SDimitry Andric       new_icvs);
26660b57cec5SDimitry Andric   KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
26670b57cec5SDimitry Andric                 team->t.t_threads[0], team));
26680b57cec5SDimitry Andric #elif KMP_BARRIER_ICV_PUSH
26690b57cec5SDimitry Andric   // The ICVs will be propagated in the fork barrier, so nothing needs to be
26700b57cec5SDimitry Andric   // done here.
26710b57cec5SDimitry Andric   KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
26720b57cec5SDimitry Andric                 team->t.t_threads[0], team));
26730b57cec5SDimitry Andric #else
2674fe6060f1SDimitry Andric   // Copy the ICVs to each of the non-primary threads.  This takes O(nthreads)
26750b57cec5SDimitry Andric   // time.
26760b57cec5SDimitry Andric   ngo_load(new_icvs);
26770b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
26780b57cec5SDimitry Andric   // allocated at this point
2679fe6060f1SDimitry Andric   for (int f = 1; f < new_nproc; ++f) { // Skip the primary thread
26800b57cec5SDimitry Andric     // TODO: GEH - pass in better source location info since usually NULL here
26810b57cec5SDimitry Andric     KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
26820b57cec5SDimitry Andric                   f, team->t.t_threads[f], team));
26830b57cec5SDimitry Andric     __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
26840b57cec5SDimitry Andric     ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
26850b57cec5SDimitry Andric     KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
26860b57cec5SDimitry Andric                   f, team->t.t_threads[f], team));
26870b57cec5SDimitry Andric   }
26880b57cec5SDimitry Andric   ngo_sync();
26890b57cec5SDimitry Andric #endif // KMP_BARRIER_ICV_PULL
26900b57cec5SDimitry Andric }
2691