xref: /llvm-project/openmp/runtime/src/z_Linux_util.cpp (revision 37e109c6f86e7562a7f0b54a229f57e36b778f05)
1 /*
2  * z_Linux_util.cpp -- platform specific routines.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_affinity.h"
15 #include "kmp_i18n.h"
16 #include "kmp_io.h"
17 #include "kmp_itt.h"
18 #include "kmp_lock.h"
19 #include "kmp_stats.h"
20 #include "kmp_str.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD
25 #include <alloca.h>
26 #endif
27 #include <math.h> // HUGE_VAL.
28 #if KMP_OS_LINUX
29 #include <semaphore.h>
30 #endif // KMP_OS_LINUX
31 #include <sys/resource.h>
32 #if KMP_OS_AIX
33 #include <sys/ldr.h>
34 #include <libperfstat.h>
35 #else
36 #include <sys/syscall.h>
37 #endif
38 #include <sys/time.h>
39 #include <sys/times.h>
40 #include <unistd.h>
41 
42 #if KMP_OS_LINUX
43 #include <sys/sysinfo.h>
44 #if KMP_USE_FUTEX
45 // We should really include <futex.h>, but that causes compatibility problems on
46 // different Linux* OS distributions that either require that you include (or
47 // break when you try to include) <pci/types.h>. Since all we need is the two
48 // macros below (which are part of the kernel ABI, so can't change) we just
49 // define the constants here and don't include <futex.h>
50 #ifndef FUTEX_WAIT
51 #define FUTEX_WAIT 0
52 #endif
53 #ifndef FUTEX_WAKE
54 #define FUTEX_WAKE 1
55 #endif
56 #endif
57 #elif KMP_OS_DARWIN
58 #include <mach/mach.h>
59 #include <sys/sysctl.h>
60 #elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
61 #include <sys/types.h>
62 #include <sys/sysctl.h>
63 #include <sys/user.h>
64 #include <pthread_np.h>
65 #if KMP_OS_DRAGONFLY
66 #include <kvm.h>
67 #endif
68 #elif KMP_OS_NETBSD || KMP_OS_OPENBSD
69 #include <sys/types.h>
70 #include <sys/sysctl.h>
71 #if KMP_OS_NETBSD
72 #include <sched.h>
73 #endif
74 #if KMP_OS_OPENBSD
75 #include <pthread_np.h>
76 #endif
77 #elif KMP_OS_SOLARIS
78 #include <libproc.h>
79 #include <procfs.h>
80 #include <thread.h>
81 #include <sys/loadavg.h>
82 #endif
83 
84 #include <ctype.h>
85 #include <dirent.h>
86 #include <fcntl.h>
87 
88 struct kmp_sys_timer {
89   struct timespec start;
90 };
91 
92 #ifndef TIMEVAL_TO_TIMESPEC
93 // Convert timeval to timespec.
94 #define TIMEVAL_TO_TIMESPEC(tv, ts)                                            \
95   do {                                                                         \
96     (ts)->tv_sec = (tv)->tv_sec;                                               \
97     (ts)->tv_nsec = (tv)->tv_usec * 1000;                                      \
98   } while (0)
99 #endif
100 
101 // Convert timespec to nanoseconds.
102 #define TS2NS(timespec)                                                        \
103   (((timespec).tv_sec * (long int)1e9) + (timespec).tv_nsec)
104 
105 static struct kmp_sys_timer __kmp_sys_timer_data;
106 
107 #if KMP_HANDLE_SIGNALS
108 typedef void (*sig_func_t)(int);
109 STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[NSIG];
110 static sigset_t __kmp_sigset;
111 #endif
112 
113 static int __kmp_init_runtime = FALSE;
114 
115 static int __kmp_fork_count = 0;
116 
117 static pthread_condattr_t __kmp_suspend_cond_attr;
118 static pthread_mutexattr_t __kmp_suspend_mutex_attr;
119 
120 static kmp_cond_align_t __kmp_wait_cv;
121 static kmp_mutex_align_t __kmp_wait_mx;
122 
123 kmp_uint64 __kmp_ticks_per_msec = 1000000;
124 kmp_uint64 __kmp_ticks_per_usec = 1000;
125 
126 #ifdef DEBUG_SUSPEND
127 static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
128   KMP_SNPRINTF(buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))",
129                cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock,
130                cond->c_cond.__c_waiting);
131 }
132 #endif
133 
134 #if ((KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||   \
135       KMP_OS_AIX) &&                                                           \
136      KMP_AFFINITY_SUPPORTED)
137 
138 /* Affinity support */
139 
140 void __kmp_affinity_bind_thread(int which) {
141   KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
142               "Illegal set affinity operation when not capable");
143 
144   kmp_affin_mask_t *mask;
145   KMP_CPU_ALLOC_ON_STACK(mask);
146   KMP_CPU_ZERO(mask);
147   KMP_CPU_SET(which, mask);
148   __kmp_set_system_affinity(mask, TRUE);
149   KMP_CPU_FREE_FROM_STACK(mask);
150 }
151 
152 #if KMP_OS_AIX
153 void __kmp_affinity_determine_capable(const char *env_var) {
154   // All versions of AIX support bindprocessor().
155 
156   size_t mask_size = __kmp_xproc / CHAR_BIT;
157   // Round up to byte boundary.
158   if (__kmp_xproc % CHAR_BIT)
159     ++mask_size;
160 
161   // Round up to the mask_size_type boundary.
162   if (mask_size % sizeof(__kmp_affin_mask_size))
163     mask_size += sizeof(__kmp_affin_mask_size) -
164                  mask_size % sizeof(__kmp_affin_mask_size);
165   KMP_AFFINITY_ENABLE(mask_size);
166   KA_TRACE(10,
167            ("__kmp_affinity_determine_capable: "
168             "AIX OS affinity interface bindprocessor functional (mask size = "
169             "%" KMP_SIZE_T_SPEC ").\n",
170             __kmp_affin_mask_size));
171 }
172 
173 #else // !KMP_OS_AIX
174 
175 /* Determine if we can access affinity functionality on this version of
176  * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set
177  * __kmp_affin_mask_size to the appropriate value (0 means not capable). */
178 void __kmp_affinity_determine_capable(const char *env_var) {
179   // Check and see if the OS supports thread affinity.
180 
181 #if KMP_OS_LINUX
182 #define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024)
183 #define KMP_CPU_SET_TRY_SIZE CACHE_LINE
184 #elif KMP_OS_FREEBSD || KMP_OS_DRAGONFLY
185 #define KMP_CPU_SET_SIZE_LIMIT (sizeof(cpuset_t))
186 #elif KMP_OS_NETBSD
187 #define KMP_CPU_SET_SIZE_LIMIT (256)
188 #endif
189 
190   int verbose = __kmp_affinity.flags.verbose;
191   int warnings = __kmp_affinity.flags.warnings;
192   enum affinity_type type = __kmp_affinity.type;
193 
194 #if KMP_OS_LINUX
195   long gCode;
196   unsigned char *buf;
197   buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
198 
199   // If the syscall returns a suggestion for the size,
200   // then we don't have to search for an appropriate size.
201   gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_TRY_SIZE, buf);
202   KA_TRACE(30, ("__kmp_affinity_determine_capable: "
203                 "initial getaffinity call returned %ld errno = %d\n",
204                 gCode, errno));
205 
206   if (gCode < 0 && errno != EINVAL) {
207     // System call not supported
208     if (verbose ||
209         (warnings && (type != affinity_none) && (type != affinity_default) &&
210          (type != affinity_disabled))) {
211       int error = errno;
212       kmp_msg_t err_code = KMP_ERR(error);
213       __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
214                 err_code, __kmp_msg_null);
215       if (__kmp_generate_warnings == kmp_warnings_off) {
216         __kmp_str_free(&err_code.str);
217       }
218     }
219     KMP_AFFINITY_DISABLE();
220     KMP_INTERNAL_FREE(buf);
221     return;
222   } else if (gCode > 0) {
223     // The optimal situation: the OS returns the size of the buffer it expects.
224     KMP_AFFINITY_ENABLE(gCode);
225     KA_TRACE(10, ("__kmp_affinity_determine_capable: "
226                   "affinity supported (mask size %d)\n",
227                   (int)__kmp_affin_mask_size));
228     KMP_INTERNAL_FREE(buf);
229     return;
230   }
231 
232   // Call the getaffinity system call repeatedly with increasing set sizes
233   // until we succeed, or reach an upper bound on the search.
234   KA_TRACE(30, ("__kmp_affinity_determine_capable: "
235                 "searching for proper set size\n"));
236   int size;
237   for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) {
238     gCode = syscall(__NR_sched_getaffinity, 0, size, buf);
239     KA_TRACE(30, ("__kmp_affinity_determine_capable: "
240                   "getaffinity for mask size %ld returned %ld errno = %d\n",
241                   size, gCode, errno));
242 
243     if (gCode < 0) {
244       if (errno == ENOSYS) {
245         // We shouldn't get here
246         KA_TRACE(30, ("__kmp_affinity_determine_capable: "
247                       "inconsistent OS call behavior: errno == ENOSYS for mask "
248                       "size %d\n",
249                       size));
250         if (verbose ||
251             (warnings && (type != affinity_none) &&
252              (type != affinity_default) && (type != affinity_disabled))) {
253           int error = errno;
254           kmp_msg_t err_code = KMP_ERR(error);
255           __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
256                     err_code, __kmp_msg_null);
257           if (__kmp_generate_warnings == kmp_warnings_off) {
258             __kmp_str_free(&err_code.str);
259           }
260         }
261         KMP_AFFINITY_DISABLE();
262         KMP_INTERNAL_FREE(buf);
263         return;
264       }
265       continue;
266     }
267 
268     KMP_AFFINITY_ENABLE(gCode);
269     KA_TRACE(10, ("__kmp_affinity_determine_capable: "
270                   "affinity supported (mask size %d)\n",
271                   (int)__kmp_affin_mask_size));
272     KMP_INTERNAL_FREE(buf);
273     return;
274   }
275 #elif KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY
276   long gCode;
277   unsigned char *buf;
278   buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
279   gCode = pthread_getaffinity_np(pthread_self(), KMP_CPU_SET_SIZE_LIMIT,
280                                  reinterpret_cast<cpuset_t *>(buf));
281   KA_TRACE(30, ("__kmp_affinity_determine_capable: "
282                 "initial getaffinity call returned %d errno = %d\n",
283                 gCode, errno));
284   if (gCode == 0) {
285     KMP_AFFINITY_ENABLE(KMP_CPU_SET_SIZE_LIMIT);
286     KA_TRACE(10, ("__kmp_affinity_determine_capable: "
287                   "affinity supported (mask size %d)\n",
288                   (int)__kmp_affin_mask_size));
289     KMP_INTERNAL_FREE(buf);
290     return;
291   }
292 #endif
293   KMP_INTERNAL_FREE(buf);
294 
295   // Affinity is not supported
296   KMP_AFFINITY_DISABLE();
297   KA_TRACE(10, ("__kmp_affinity_determine_capable: "
298                 "cannot determine mask size - affinity not supported\n"));
299   if (verbose || (warnings && (type != affinity_none) &&
300                   (type != affinity_default) && (type != affinity_disabled))) {
301     KMP_WARNING(AffCantGetMaskSize, env_var);
302   }
303 }
304 #endif // KMP_OS_AIX
305 #endif // (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD ||                  \
306            KMP_OS_DRAGONFLY || KMP_OS_AIX) && KMP_AFFINITY_SUPPORTED
307 
308 #if KMP_USE_FUTEX
309 
310 int __kmp_futex_determine_capable() {
311   int loc = 0;
312   long rc = syscall(__NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0);
313   int retval = (rc == 0) || (errno != ENOSYS);
314 
315   KA_TRACE(10,
316            ("__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, errno));
317   KA_TRACE(10, ("__kmp_futex_determine_capable: futex syscall%s supported\n",
318                 retval ? "" : " not"));
319 
320   return retval;
321 }
322 
323 #endif // KMP_USE_FUTEX
324 
325 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_WASM) && (!KMP_ASM_INTRINS)
326 /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
327    use compare_and_store for these routines */
328 
329 kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) {
330   kmp_int8 old_value, new_value;
331 
332   old_value = TCR_1(*p);
333   new_value = old_value | d;
334 
335   while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
336     KMP_CPU_PAUSE();
337     old_value = TCR_1(*p);
338     new_value = old_value | d;
339   }
340   return old_value;
341 }
342 
343 kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) {
344   kmp_int8 old_value, new_value;
345 
346   old_value = TCR_1(*p);
347   new_value = old_value & d;
348 
349   while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
350     KMP_CPU_PAUSE();
351     old_value = TCR_1(*p);
352     new_value = old_value & d;
353   }
354   return old_value;
355 }
356 
357 kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) {
358   kmp_uint32 old_value, new_value;
359 
360   old_value = TCR_4(*p);
361   new_value = old_value | d;
362 
363   while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
364     KMP_CPU_PAUSE();
365     old_value = TCR_4(*p);
366     new_value = old_value | d;
367   }
368   return old_value;
369 }
370 
371 kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) {
372   kmp_uint32 old_value, new_value;
373 
374   old_value = TCR_4(*p);
375   new_value = old_value & d;
376 
377   while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
378     KMP_CPU_PAUSE();
379     old_value = TCR_4(*p);
380     new_value = old_value & d;
381   }
382   return old_value;
383 }
384 
385 #if KMP_ARCH_X86 || KMP_ARCH_WASM
386 kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) {
387   kmp_int8 old_value, new_value;
388 
389   old_value = TCR_1(*p);
390   new_value = old_value + d;
391 
392   while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
393     KMP_CPU_PAUSE();
394     old_value = TCR_1(*p);
395     new_value = old_value + d;
396   }
397   return old_value;
398 }
399 
400 kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) {
401   kmp_int64 old_value, new_value;
402 
403   old_value = TCR_8(*p);
404   new_value = old_value + d;
405 
406   while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
407     KMP_CPU_PAUSE();
408     old_value = TCR_8(*p);
409     new_value = old_value + d;
410   }
411   return old_value;
412 }
413 #endif /* KMP_ARCH_X86 */
414 
415 kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) {
416   kmp_uint64 old_value, new_value;
417 
418   old_value = TCR_8(*p);
419   new_value = old_value | d;
420   while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
421     KMP_CPU_PAUSE();
422     old_value = TCR_8(*p);
423     new_value = old_value | d;
424   }
425   return old_value;
426 }
427 
428 kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) {
429   kmp_uint64 old_value, new_value;
430 
431   old_value = TCR_8(*p);
432   new_value = old_value & d;
433   while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
434     KMP_CPU_PAUSE();
435     old_value = TCR_8(*p);
436     new_value = old_value & d;
437   }
438   return old_value;
439 }
440 
441 #endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */
442 
443 void __kmp_terminate_thread(int gtid) {
444   int status;
445   kmp_info_t *th = __kmp_threads[gtid];
446 
447   if (!th)
448     return;
449 
450 #ifdef KMP_CANCEL_THREADS
451   KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid));
452   status = pthread_cancel(th->th.th_info.ds.ds_thread);
453   if (status != 0 && status != ESRCH) {
454     __kmp_fatal(KMP_MSG(CantTerminateWorkerThread), KMP_ERR(status),
455                 __kmp_msg_null);
456   }
457 #endif
458   KMP_YIELD(TRUE);
459 } //
460 
461 /* Set thread stack info.
462    If values are unreasonable, assume call failed and use incremental stack
463    refinement method instead. Returns TRUE if the stack parameters could be
464    determined exactly, FALSE if incremental refinement is necessary. */
465 static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
466   int stack_data;
467 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
468     KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_AIX
469   int status;
470   size_t size = 0;
471   void *addr = 0;
472 
473   /* Always do incremental stack refinement for ubermaster threads since the
474      initial thread stack range can be reduced by sibling thread creation so
475      pthread_attr_getstack may cause thread gtid aliasing */
476   if (!KMP_UBER_GTID(gtid)) {
477 
478 #if KMP_OS_SOLARIS
479     stack_t s;
480     if ((status = thr_stksegment(&s)) < 0) {
481       KMP_CHECK_SYSFAIL("thr_stksegment", status);
482     }
483 
484     addr = s.ss_sp;
485     size = s.ss_size;
486     KA_TRACE(60, ("__kmp_set_stack_info: T#%d thr_stksegment returned size:"
487                   " %lu, low addr: %p\n",
488                   gtid, size, addr));
489 #else
490     pthread_attr_t attr;
491     /* Fetch the real thread attributes */
492     status = pthread_attr_init(&attr);
493     KMP_CHECK_SYSFAIL("pthread_attr_init", status);
494 #if KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD
495     status = pthread_attr_get_np(pthread_self(), &attr);
496     KMP_CHECK_SYSFAIL("pthread_attr_get_np", status);
497 #else
498     status = pthread_getattr_np(pthread_self(), &attr);
499     KMP_CHECK_SYSFAIL("pthread_getattr_np", status);
500 #endif
501     status = pthread_attr_getstack(&attr, &addr, &size);
502     KMP_CHECK_SYSFAIL("pthread_attr_getstack", status);
503     KA_TRACE(60,
504              ("__kmp_set_stack_info: T#%d pthread_attr_getstack returned size:"
505               " %lu, low addr: %p\n",
506               gtid, size, addr));
507     status = pthread_attr_destroy(&attr);
508     KMP_CHECK_SYSFAIL("pthread_attr_destroy", status);
509 #endif
510   }
511 
512   if (size != 0 && addr != 0) { // was stack parameter determination successful?
513     /* Store the correct base and size */
514     TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size));
515     TCW_PTR(th->th.th_info.ds.ds_stacksize, size);
516     TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
517     return TRUE;
518   }
519 #endif /* KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD  \
520           || KMP_OS_HURD || KMP_OS_SOLARIS */
521   /* Use incremental refinement starting from initial conservative estimate */
522   TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
523   TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
524   TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
525   return FALSE;
526 }
527 
528 static void *__kmp_launch_worker(void *thr) {
529   int status, old_type, old_state;
530 #ifdef KMP_BLOCK_SIGNALS
531   sigset_t new_set, old_set;
532 #endif /* KMP_BLOCK_SIGNALS */
533   void *exit_val;
534 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
535     KMP_OS_OPENBSD || KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_AIX
536   void *volatile padding = 0;
537 #endif
538   int gtid;
539 
540   gtid = ((kmp_info_t *)thr)->th.th_info.ds.ds_gtid;
541   __kmp_gtid_set_specific(gtid);
542 #ifdef KMP_TDATA_GTID
543   __kmp_gtid = gtid;
544 #endif
545 #if KMP_STATS_ENABLED
546   // set thread local index to point to thread-specific stats
547   __kmp_stats_thread_ptr = ((kmp_info_t *)thr)->th.th_stats;
548   __kmp_stats_thread_ptr->startLife();
549   KMP_SET_THREAD_STATE(IDLE);
550   KMP_INIT_PARTITIONED_TIMERS(OMP_idle);
551 #endif
552 
553 #if USE_ITT_BUILD
554   __kmp_itt_thread_name(gtid);
555 #endif /* USE_ITT_BUILD */
556 
557 #if KMP_AFFINITY_SUPPORTED
558   __kmp_affinity_bind_init_mask(gtid);
559 #endif
560 
561 #ifdef KMP_CANCEL_THREADS
562   status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
563   KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
564   // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
565   status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
566   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
567 #endif
568 
569 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
570   // Set FP control regs to be a copy of the parallel initialization thread's.
571   __kmp_clear_x87_fpu_status_word();
572   __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
573   __kmp_load_mxcsr(&__kmp_init_mxcsr);
574 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
575 
576 #ifdef KMP_BLOCK_SIGNALS
577   status = sigfillset(&new_set);
578   KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
579   status = pthread_sigmask(SIG_BLOCK, &new_set, &old_set);
580   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
581 #endif /* KMP_BLOCK_SIGNALS */
582 
583 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
584     KMP_OS_OPENBSD || KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_AIX
585   if (__kmp_stkoffset > 0 && gtid > 0) {
586     padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
587     (void)padding;
588   }
589 #endif
590 
591   KMP_MB();
592   __kmp_set_stack_info(gtid, (kmp_info_t *)thr);
593 
594   __kmp_check_stack_overlap((kmp_info_t *)thr);
595 
596   exit_val = __kmp_launch_thread((kmp_info_t *)thr);
597 
598 #ifdef KMP_BLOCK_SIGNALS
599   status = pthread_sigmask(SIG_SETMASK, &old_set, NULL);
600   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
601 #endif /* KMP_BLOCK_SIGNALS */
602 
603   return exit_val;
604 }
605 
606 #if KMP_USE_MONITOR
607 /* The monitor thread controls all of the threads in the complex */
608 
609 static void *__kmp_launch_monitor(void *thr) {
610   int status, old_type, old_state;
611 #ifdef KMP_BLOCK_SIGNALS
612   sigset_t new_set;
613 #endif /* KMP_BLOCK_SIGNALS */
614   struct timespec interval;
615 
616   KMP_MB(); /* Flush all pending memory write invalidates.  */
617 
618   KA_TRACE(10, ("__kmp_launch_monitor: #1 launched\n"));
619 
620   /* register us as the monitor thread */
621   __kmp_gtid_set_specific(KMP_GTID_MONITOR);
622 #ifdef KMP_TDATA_GTID
623   __kmp_gtid = KMP_GTID_MONITOR;
624 #endif
625 
626   KMP_MB();
627 
628 #if USE_ITT_BUILD
629   // Instruct Intel(R) Threading Tools to ignore monitor thread.
630   __kmp_itt_thread_ignore();
631 #endif /* USE_ITT_BUILD */
632 
633   __kmp_set_stack_info(((kmp_info_t *)thr)->th.th_info.ds.ds_gtid,
634                        (kmp_info_t *)thr);
635 
636   __kmp_check_stack_overlap((kmp_info_t *)thr);
637 
638 #ifdef KMP_CANCEL_THREADS
639   status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
640   KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
641   // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
642   status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
643   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
644 #endif
645 
646 #if KMP_REAL_TIME_FIX
647   // This is a potential fix which allows application with real-time scheduling
648   // policy work. However, decision about the fix is not made yet, so it is
649   // disabled by default.
650   { // Are program started with real-time scheduling policy?
651     int sched = sched_getscheduler(0);
652     if (sched == SCHED_FIFO || sched == SCHED_RR) {
653       // Yes, we are a part of real-time application. Try to increase the
654       // priority of the monitor.
655       struct sched_param param;
656       int max_priority = sched_get_priority_max(sched);
657       int rc;
658       KMP_WARNING(RealTimeSchedNotSupported);
659       sched_getparam(0, &param);
660       if (param.sched_priority < max_priority) {
661         param.sched_priority += 1;
662         rc = sched_setscheduler(0, sched, &param);
663         if (rc != 0) {
664           int error = errno;
665           kmp_msg_t err_code = KMP_ERR(error);
666           __kmp_msg(kmp_ms_warning, KMP_MSG(CantChangeMonitorPriority),
667                     err_code, KMP_MSG(MonitorWillStarve), __kmp_msg_null);
668           if (__kmp_generate_warnings == kmp_warnings_off) {
669             __kmp_str_free(&err_code.str);
670           }
671         }
672       } else {
673         // We cannot abort here, because number of CPUs may be enough for all
674         // the threads, including the monitor thread, so application could
675         // potentially work...
676         __kmp_msg(kmp_ms_warning, KMP_MSG(RunningAtMaxPriority),
677                   KMP_MSG(MonitorWillStarve), KMP_HNT(RunningAtMaxPriority),
678                   __kmp_msg_null);
679       }
680     }
681     // AC: free thread that waits for monitor started
682     TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
683   }
684 #endif // KMP_REAL_TIME_FIX
685 
686   KMP_MB(); /* Flush all pending memory write invalidates.  */
687 
688   if (__kmp_monitor_wakeups == 1) {
689     interval.tv_sec = 1;
690     interval.tv_nsec = 0;
691   } else {
692     interval.tv_sec = 0;
693     interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups);
694   }
695 
696   KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));
697 
698   while (!TCR_4(__kmp_global.g.g_done)) {
699     struct timespec now;
700     struct timeval tval;
701 
702     /*  This thread monitors the state of the system */
703 
704     KA_TRACE(15, ("__kmp_launch_monitor: update\n"));
705 
706     status = gettimeofday(&tval, NULL);
707     KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
708     TIMEVAL_TO_TIMESPEC(&tval, &now);
709 
710     now.tv_sec += interval.tv_sec;
711     now.tv_nsec += interval.tv_nsec;
712 
713     if (now.tv_nsec >= KMP_NSEC_PER_SEC) {
714       now.tv_sec += 1;
715       now.tv_nsec -= KMP_NSEC_PER_SEC;
716     }
717 
718     status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
719     KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
720     // AC: the monitor should not fall asleep if g_done has been set
721     if (!TCR_4(__kmp_global.g.g_done)) { // check once more under mutex
722       status = pthread_cond_timedwait(&__kmp_wait_cv.c_cond,
723                                       &__kmp_wait_mx.m_mutex, &now);
724       if (status != 0) {
725         if (status != ETIMEDOUT && status != EINTR) {
726           KMP_SYSFAIL("pthread_cond_timedwait", status);
727         }
728       }
729     }
730     status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
731     KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
732 
733     TCW_4(__kmp_global.g.g_time.dt.t_value,
734           TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
735 
736     KMP_MB(); /* Flush all pending memory write invalidates.  */
737   }
738 
739   KA_TRACE(10, ("__kmp_launch_monitor: #3 cleanup\n"));
740 
741 #ifdef KMP_BLOCK_SIGNALS
742   status = sigfillset(&new_set);
743   KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
744   status = pthread_sigmask(SIG_UNBLOCK, &new_set, NULL);
745   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
746 #endif /* KMP_BLOCK_SIGNALS */
747 
748   KA_TRACE(10, ("__kmp_launch_monitor: #4 finished\n"));
749 
750   if (__kmp_global.g.g_abort != 0) {
751     /* now we need to terminate the worker threads  */
752     /* the value of t_abort is the signal we caught */
753 
754     int gtid;
755 
756     KA_TRACE(10, ("__kmp_launch_monitor: #5 terminate sig=%d\n",
757                   __kmp_global.g.g_abort));
758 
759     /* terminate the OpenMP worker threads */
760     /* TODO this is not valid for sibling threads!!
761      * the uber master might not be 0 anymore.. */
762     for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
763       __kmp_terminate_thread(gtid);
764 
765     __kmp_cleanup();
766 
767     KA_TRACE(10, ("__kmp_launch_monitor: #6 raise sig=%d\n",
768                   __kmp_global.g.g_abort));
769 
770     if (__kmp_global.g.g_abort > 0)
771       raise(__kmp_global.g.g_abort);
772   }
773 
774   KA_TRACE(10, ("__kmp_launch_monitor: #7 exit\n"));
775 
776   return thr;
777 }
778 #endif // KMP_USE_MONITOR
779 
780 void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
781   pthread_t handle;
782   pthread_attr_t thread_attr;
783   int status;
784 
785   th->th.th_info.ds.ds_gtid = gtid;
786 
787 #if KMP_STATS_ENABLED
788   // sets up worker thread stats
789   __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid);
790 
791   // th->th.th_stats is used to transfer thread-specific stats-pointer to
792   // __kmp_launch_worker. So when thread is created (goes into
793   // __kmp_launch_worker) it will set its thread local pointer to
794   // th->th.th_stats
795   if (!KMP_UBER_GTID(gtid)) {
796     th->th.th_stats = __kmp_stats_list->push_back(gtid);
797   } else {
798     // For root threads, __kmp_stats_thread_ptr is set in __kmp_register_root(),
799     // so set the th->th.th_stats field to it.
800     th->th.th_stats = __kmp_stats_thread_ptr;
801   }
802   __kmp_release_tas_lock(&__kmp_stats_lock, gtid);
803 
804 #endif // KMP_STATS_ENABLED
805 
806   if (KMP_UBER_GTID(gtid)) {
807     KA_TRACE(10, ("__kmp_create_worker: uber thread (%d)\n", gtid));
808     th->th.th_info.ds.ds_thread = pthread_self();
809     __kmp_set_stack_info(gtid, th);
810     __kmp_check_stack_overlap(th);
811     return;
812   }
813 
814   KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid));
815 
816   KMP_MB(); /* Flush all pending memory write invalidates.  */
817 
818 #ifdef KMP_THREAD_ATTR
819   status = pthread_attr_init(&thread_attr);
820   if (status != 0) {
821     __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
822   }
823   status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
824   if (status != 0) {
825     __kmp_fatal(KMP_MSG(CantSetWorkerState), KMP_ERR(status), __kmp_msg_null);
826   }
827 
828   /* Set stack size for this thread now.
829      The multiple of 2 is there because on some machines, requesting an unusual
830      stacksize causes the thread to have an offset before the dummy alloca()
831      takes place to create the offset.  Since we want the user to have a
832      sufficient stacksize AND support a stack offset, we alloca() twice the
833      offset so that the upcoming alloca() does not eliminate any premade offset,
834      and also gives the user the stack space they requested for all threads */
835   stack_size += gtid * __kmp_stkoffset * 2;
836 
837   KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
838                 "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
839                 gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
840 
841 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
842   status = pthread_attr_setstacksize(&thread_attr, stack_size);
843 #ifdef KMP_BACKUP_STKSIZE
844   if (status != 0) {
845     if (!__kmp_env_stksize) {
846       stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset;
847       __kmp_stksize = KMP_BACKUP_STKSIZE;
848       KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
849                     "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu "
850                     "bytes\n",
851                     gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
852       status = pthread_attr_setstacksize(&thread_attr, stack_size);
853     }
854   }
855 #endif /* KMP_BACKUP_STKSIZE */
856   if (status != 0) {
857     __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
858                 KMP_HNT(ChangeWorkerStackSize), __kmp_msg_null);
859   }
860 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
861 
862 #endif /* KMP_THREAD_ATTR */
863 
864   status =
865       pthread_create(&handle, &thread_attr, __kmp_launch_worker, (void *)th);
866   if (status != 0 || !handle) { // ??? Why do we check handle??
867 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
868     if (status == EINVAL) {
869       __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
870                   KMP_HNT(IncreaseWorkerStackSize), __kmp_msg_null);
871     }
872     if (status == ENOMEM) {
873       __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
874                   KMP_HNT(DecreaseWorkerStackSize), __kmp_msg_null);
875     }
876 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
877     if (status == EAGAIN) {
878       __kmp_fatal(KMP_MSG(NoResourcesForWorkerThread), KMP_ERR(status),
879                   KMP_HNT(Decrease_NUM_THREADS), __kmp_msg_null);
880     }
881     KMP_SYSFAIL("pthread_create", status);
882   }
883 
884   // Rename worker threads for improved debuggability
885   if (!KMP_UBER_GTID(gtid)) {
886 #if defined(LIBOMP_HAVE_PTHREAD_SET_NAME_NP)
887     pthread_set_name_np(handle, "openmp_worker");
888 #elif defined(LIBOMP_HAVE_PTHREAD_SETNAME_NP) && !KMP_OS_DARWIN
889 #if KMP_OS_NETBSD
890     pthread_setname_np(handle, "%s", const_cast<char *>("openmp_worker"));
891 #else
892     pthread_setname_np(handle, "openmp_worker");
893 #endif
894 #endif
895   }
896 
897   th->th.th_info.ds.ds_thread = handle;
898 
899 #ifdef KMP_THREAD_ATTR
900   status = pthread_attr_destroy(&thread_attr);
901   if (status) {
902     kmp_msg_t err_code = KMP_ERR(status);
903     __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
904               __kmp_msg_null);
905     if (__kmp_generate_warnings == kmp_warnings_off) {
906       __kmp_str_free(&err_code.str);
907     }
908   }
909 #endif /* KMP_THREAD_ATTR */
910 
911   KMP_MB(); /* Flush all pending memory write invalidates.  */
912 
913   KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid));
914 
915 } // __kmp_create_worker
916 
917 #if KMP_USE_MONITOR
918 void __kmp_create_monitor(kmp_info_t *th) {
919   pthread_t handle;
920   pthread_attr_t thread_attr;
921   size_t size;
922   int status;
923   int auto_adj_size = FALSE;
924 
925   if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
926     // We don't need monitor thread in case of MAX_BLOCKTIME
927     KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of "
928                   "MAX blocktime\n"));
929     th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
930     th->th.th_info.ds.ds_gtid = 0;
931     return;
932   }
933   KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n"));
934 
935   KMP_MB(); /* Flush all pending memory write invalidates.  */
936 
937   th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
938   th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
939 #if KMP_REAL_TIME_FIX
940   TCW_4(__kmp_global.g.g_time.dt.t_value,
941         -1); // Will use it for synchronization a bit later.
942 #else
943   TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
944 #endif // KMP_REAL_TIME_FIX
945 
946 #ifdef KMP_THREAD_ATTR
947   if (__kmp_monitor_stksize == 0) {
948     __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
949     auto_adj_size = TRUE;
950   }
951   status = pthread_attr_init(&thread_attr);
952   if (status != 0) {
953     __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
954   }
955   status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
956   if (status != 0) {
957     __kmp_fatal(KMP_MSG(CantSetMonitorState), KMP_ERR(status), __kmp_msg_null);
958   }
959 
960 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
961   status = pthread_attr_getstacksize(&thread_attr, &size);
962   KMP_CHECK_SYSFAIL("pthread_attr_getstacksize", status);
963 #else
964   size = __kmp_sys_min_stksize;
965 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
966 #endif /* KMP_THREAD_ATTR */
967 
968   if (__kmp_monitor_stksize == 0) {
969     __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
970   }
971   if (__kmp_monitor_stksize < __kmp_sys_min_stksize) {
972     __kmp_monitor_stksize = __kmp_sys_min_stksize;
973   }
974 
975   KA_TRACE(10, ("__kmp_create_monitor: default stacksize = %lu bytes,"
976                 "requested stacksize = %lu bytes\n",
977                 size, __kmp_monitor_stksize));
978 
979 retry:
980 
981 /* Set stack size for this thread now. */
982 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
983   KA_TRACE(10, ("__kmp_create_monitor: setting stacksize = %lu bytes,",
984                 __kmp_monitor_stksize));
985   status = pthread_attr_setstacksize(&thread_attr, __kmp_monitor_stksize);
986   if (status != 0) {
987     if (auto_adj_size) {
988       __kmp_monitor_stksize *= 2;
989       goto retry;
990     }
991     kmp_msg_t err_code = KMP_ERR(status);
992     __kmp_msg(kmp_ms_warning, // should this be fatal?  BB
993               KMP_MSG(CantSetMonitorStackSize, (long int)__kmp_monitor_stksize),
994               err_code, KMP_HNT(ChangeMonitorStackSize), __kmp_msg_null);
995     if (__kmp_generate_warnings == kmp_warnings_off) {
996       __kmp_str_free(&err_code.str);
997     }
998   }
999 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
1000 
1001   status =
1002       pthread_create(&handle, &thread_attr, __kmp_launch_monitor, (void *)th);
1003 
1004   if (status != 0) {
1005 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
1006     if (status == EINVAL) {
1007       if (auto_adj_size && (__kmp_monitor_stksize < (size_t)0x40000000)) {
1008         __kmp_monitor_stksize *= 2;
1009         goto retry;
1010       }
1011       __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
1012                   KMP_ERR(status), KMP_HNT(IncreaseMonitorStackSize),
1013                   __kmp_msg_null);
1014     }
1015     if (status == ENOMEM) {
1016       __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
1017                   KMP_ERR(status), KMP_HNT(DecreaseMonitorStackSize),
1018                   __kmp_msg_null);
1019     }
1020 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
1021     if (status == EAGAIN) {
1022       __kmp_fatal(KMP_MSG(NoResourcesForMonitorThread), KMP_ERR(status),
1023                   KMP_HNT(DecreaseNumberOfThreadsInUse), __kmp_msg_null);
1024     }
1025     KMP_SYSFAIL("pthread_create", status);
1026   }
1027 
1028   th->th.th_info.ds.ds_thread = handle;
1029 
1030 #if KMP_REAL_TIME_FIX
1031   // Wait for the monitor thread is really started and set its *priority*.
1032   KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
1033                    sizeof(__kmp_global.g.g_time.dt.t_value));
1034   __kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1,
1035                &__kmp_neq_4, NULL);
1036 #endif // KMP_REAL_TIME_FIX
1037 
1038 #ifdef KMP_THREAD_ATTR
1039   status = pthread_attr_destroy(&thread_attr);
1040   if (status != 0) {
1041     kmp_msg_t err_code = KMP_ERR(status);
1042     __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
1043               __kmp_msg_null);
1044     if (__kmp_generate_warnings == kmp_warnings_off) {
1045       __kmp_str_free(&err_code.str);
1046     }
1047   }
1048 #endif
1049 
1050   KMP_MB(); /* Flush all pending memory write invalidates.  */
1051 
1052   KA_TRACE(10, ("__kmp_create_monitor: monitor created %#.8lx\n",
1053                 th->th.th_info.ds.ds_thread));
1054 
1055 } // __kmp_create_monitor
1056 #endif // KMP_USE_MONITOR
1057 
1058 void __kmp_exit_thread(int exit_status) {
1059 #if KMP_OS_WASI
1060 // TODO: the wasm32-wasi-threads target does not yet support pthread_exit.
1061 #else
1062   pthread_exit((void *)(intptr_t)exit_status);
1063 #endif
1064 } // __kmp_exit_thread
1065 
1066 #if KMP_USE_MONITOR
1067 void __kmp_resume_monitor();
1068 
1069 extern "C" void __kmp_reap_monitor(kmp_info_t *th) {
1070   int status;
1071   void *exit_val;
1072 
1073   KA_TRACE(10, ("__kmp_reap_monitor: try to reap monitor thread with handle"
1074                 " %#.8lx\n",
1075                 th->th.th_info.ds.ds_thread));
1076 
1077   // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1078   // If both tid and gtid are 0, it means the monitor did not ever start.
1079   // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1080   KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid);
1081   if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) {
1082     KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n"));
1083     return;
1084   }
1085 
1086   KMP_MB(); /* Flush all pending memory write invalidates.  */
1087 
1088   /* First, check to see whether the monitor thread exists to wake it up. This
1089      is to avoid performance problem when the monitor sleeps during
1090      blocktime-size interval */
1091 
1092   status = pthread_kill(th->th.th_info.ds.ds_thread, 0);
1093   if (status != ESRCH) {
1094     __kmp_resume_monitor(); // Wake up the monitor thread
1095   }
1096   KA_TRACE(10, ("__kmp_reap_monitor: try to join with monitor\n"));
1097   status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1098   if (exit_val != th) {
1099     __kmp_fatal(KMP_MSG(ReapMonitorError), KMP_ERR(status), __kmp_msg_null);
1100   }
1101 
1102   th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1103   th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1104 
1105   KA_TRACE(10, ("__kmp_reap_monitor: done reaping monitor thread with handle"
1106                 " %#.8lx\n",
1107                 th->th.th_info.ds.ds_thread));
1108 
1109   KMP_MB(); /* Flush all pending memory write invalidates.  */
1110 }
1111 #else
1112 // Empty symbol to export (see exports_so.txt) when
1113 // monitor thread feature is disabled
1114 extern "C" void __kmp_reap_monitor(kmp_info_t *th) { (void)th; }
1115 #endif // KMP_USE_MONITOR
1116 
1117 void __kmp_reap_worker(kmp_info_t *th) {
1118   int status;
1119   void *exit_val;
1120 
1121   KMP_MB(); /* Flush all pending memory write invalidates.  */
1122 
1123   KA_TRACE(
1124       10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid));
1125 
1126   status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1127 #ifdef KMP_DEBUG
1128   /* Don't expose these to the user until we understand when they trigger */
1129   if (status != 0) {
1130     __kmp_fatal(KMP_MSG(ReapWorkerError), KMP_ERR(status), __kmp_msg_null);
1131   }
1132   if (exit_val != th) {
1133     KA_TRACE(10, ("__kmp_reap_worker: worker T#%d did not reap properly, "
1134                   "exit_val = %p\n",
1135                   th->th.th_info.ds.ds_gtid, exit_val));
1136   }
1137 #else
1138   (void)status; // unused variable
1139 #endif /* KMP_DEBUG */
1140 
1141   KA_TRACE(10, ("__kmp_reap_worker: done reaping T#%d\n",
1142                 th->th.th_info.ds.ds_gtid));
1143 
1144   KMP_MB(); /* Flush all pending memory write invalidates.  */
1145 }
1146 
1147 #if KMP_HANDLE_SIGNALS
1148 
1149 static void __kmp_null_handler(int signo) {
1150   //  Do nothing, for doing SIG_IGN-type actions.
1151 } // __kmp_null_handler
1152 
1153 static void __kmp_team_handler(int signo) {
1154   if (__kmp_global.g.g_abort == 0) {
1155 /* Stage 1 signal handler, let's shut down all of the threads */
1156 #ifdef KMP_DEBUG
1157     __kmp_debug_printf("__kmp_team_handler: caught signal = %d\n", signo);
1158 #endif
1159     switch (signo) {
1160     case SIGHUP:
1161     case SIGINT:
1162     case SIGQUIT:
1163     case SIGILL:
1164     case SIGABRT:
1165     case SIGFPE:
1166     case SIGBUS:
1167     case SIGSEGV:
1168 #ifdef SIGSYS
1169     case SIGSYS:
1170 #endif
1171     case SIGTERM:
1172       if (__kmp_debug_buf) {
1173         __kmp_dump_debug_buffer();
1174       }
1175       __kmp_unregister_library(); // cleanup shared memory
1176       KMP_MB(); // Flush all pending memory write invalidates.
1177       TCW_4(__kmp_global.g.g_abort, signo);
1178       KMP_MB(); // Flush all pending memory write invalidates.
1179       TCW_4(__kmp_global.g.g_done, TRUE);
1180       KMP_MB(); // Flush all pending memory write invalidates.
1181       break;
1182     default:
1183 #ifdef KMP_DEBUG
1184       __kmp_debug_printf("__kmp_team_handler: unknown signal type");
1185 #endif
1186       break;
1187     }
1188   }
1189 } // __kmp_team_handler
1190 
1191 static void __kmp_sigaction(int signum, const struct sigaction *act,
1192                             struct sigaction *oldact) {
1193   int rc = sigaction(signum, act, oldact);
1194   KMP_CHECK_SYSFAIL_ERRNO("sigaction", rc);
1195 }
1196 
1197 static void __kmp_install_one_handler(int sig, sig_func_t handler_func,
1198                                       int parallel_init) {
1199   KMP_MB(); // Flush all pending memory write invalidates.
1200   KB_TRACE(60,
1201            ("__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init));
1202   if (parallel_init) {
1203     struct sigaction new_action;
1204     struct sigaction old_action;
1205     new_action.sa_handler = handler_func;
1206     new_action.sa_flags = 0;
1207     sigfillset(&new_action.sa_mask);
1208     __kmp_sigaction(sig, &new_action, &old_action);
1209     if (old_action.sa_handler == __kmp_sighldrs[sig].sa_handler) {
1210       sigaddset(&__kmp_sigset, sig);
1211     } else {
1212       // Restore/keep user's handler if one previously installed.
1213       __kmp_sigaction(sig, &old_action, NULL);
1214     }
1215   } else {
1216     // Save initial/system signal handlers to see if user handlers installed.
1217     __kmp_sigaction(sig, NULL, &__kmp_sighldrs[sig]);
1218   }
1219   KMP_MB(); // Flush all pending memory write invalidates.
1220 } // __kmp_install_one_handler
1221 
1222 static void __kmp_remove_one_handler(int sig) {
1223   KB_TRACE(60, ("__kmp_remove_one_handler( %d )\n", sig));
1224   if (sigismember(&__kmp_sigset, sig)) {
1225     struct sigaction old;
1226     KMP_MB(); // Flush all pending memory write invalidates.
1227     __kmp_sigaction(sig, &__kmp_sighldrs[sig], &old);
1228     if ((old.sa_handler != __kmp_team_handler) &&
1229         (old.sa_handler != __kmp_null_handler)) {
1230       // Restore the users signal handler.
1231       KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, "
1232                     "restoring: sig=%d\n",
1233                     sig));
1234       __kmp_sigaction(sig, &old, NULL);
1235     }
1236     sigdelset(&__kmp_sigset, sig);
1237     KMP_MB(); // Flush all pending memory write invalidates.
1238   }
1239 } // __kmp_remove_one_handler
1240 
1241 void __kmp_install_signals(int parallel_init) {
1242   KB_TRACE(10, ("__kmp_install_signals( %d )\n", parallel_init));
1243   if (__kmp_handle_signals || !parallel_init) {
1244     // If ! parallel_init, we do not install handlers, just save original
1245     // handlers. Let us do it even __handle_signals is 0.
1246     sigemptyset(&__kmp_sigset);
1247     __kmp_install_one_handler(SIGHUP, __kmp_team_handler, parallel_init);
1248     __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init);
1249     __kmp_install_one_handler(SIGQUIT, __kmp_team_handler, parallel_init);
1250     __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init);
1251     __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init);
1252     __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init);
1253     __kmp_install_one_handler(SIGBUS, __kmp_team_handler, parallel_init);
1254     __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init);
1255 #ifdef SIGSYS
1256     __kmp_install_one_handler(SIGSYS, __kmp_team_handler, parallel_init);
1257 #endif // SIGSYS
1258     __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init);
1259 #ifdef SIGPIPE
1260     __kmp_install_one_handler(SIGPIPE, __kmp_team_handler, parallel_init);
1261 #endif // SIGPIPE
1262   }
1263 } // __kmp_install_signals
1264 
1265 void __kmp_remove_signals(void) {
1266   int sig;
1267   KB_TRACE(10, ("__kmp_remove_signals()\n"));
1268   for (sig = 1; sig < NSIG; ++sig) {
1269     __kmp_remove_one_handler(sig);
1270   }
1271 } // __kmp_remove_signals
1272 
1273 #endif // KMP_HANDLE_SIGNALS
1274 
1275 void __kmp_enable(int new_state) {
1276 #ifdef KMP_CANCEL_THREADS
1277   int status, old_state;
1278   status = pthread_setcancelstate(new_state, &old_state);
1279   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1280   KMP_DEBUG_ASSERT(old_state == PTHREAD_CANCEL_DISABLE);
1281 #endif
1282 }
1283 
1284 void __kmp_disable(int *old_state) {
1285 #ifdef KMP_CANCEL_THREADS
1286   int status;
1287   status = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, old_state);
1288   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1289 #endif
1290 }
1291 
1292 static void __kmp_atfork_prepare(void) {
1293   __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
1294   __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1295 }
1296 
1297 static void __kmp_atfork_parent(void) {
1298   __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1299   __kmp_release_bootstrap_lock(&__kmp_initz_lock);
1300 }
1301 
1302 /* Reset the library so execution in the child starts "all over again" with
1303    clean data structures in initial states.  Don't worry about freeing memory
1304    allocated by parent, just abandon it to be safe. */
1305 static void __kmp_atfork_child(void) {
1306   __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1307   __kmp_release_bootstrap_lock(&__kmp_initz_lock);
1308   /* TODO make sure this is done right for nested/sibling */
1309   // ATT:  Memory leaks are here? TODO: Check it and fix.
1310   /* KMP_ASSERT( 0 ); */
1311 
1312   ++__kmp_fork_count;
1313 
1314 #if KMP_AFFINITY_SUPPORTED
1315 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||     \
1316     KMP_OS_AIX
1317   // reset the affinity in the child to the initial thread
1318   // affinity in the parent
1319   kmp_set_thread_affinity_mask_initial();
1320 #endif
1321   // Set default not to bind threads tightly in the child (we're expecting
1322   // over-subscription after the fork and this can improve things for
1323   // scripting languages that use OpenMP inside process-parallel code).
1324   if (__kmp_nested_proc_bind.bind_types != NULL) {
1325     __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
1326   }
1327   for (kmp_affinity_t *affinity : __kmp_affinities)
1328     *affinity = KMP_AFFINITY_INIT(affinity->env_var);
1329   __kmp_affin_fullMask = nullptr;
1330   __kmp_affin_origMask = nullptr;
1331   __kmp_topology = nullptr;
1332 #endif // KMP_AFFINITY_SUPPORTED
1333 
1334 #if KMP_USE_MONITOR
1335   __kmp_init_monitor = 0;
1336 #endif
1337   __kmp_init_parallel = FALSE;
1338   __kmp_init_middle = FALSE;
1339   __kmp_init_serial = FALSE;
1340   TCW_4(__kmp_init_gtid, FALSE);
1341   __kmp_init_common = FALSE;
1342 
1343   TCW_4(__kmp_init_user_locks, FALSE);
1344 #if !KMP_USE_DYNAMIC_LOCK
1345   __kmp_user_lock_table.used = 1;
1346   __kmp_user_lock_table.allocated = 0;
1347   __kmp_user_lock_table.table = NULL;
1348   __kmp_lock_blocks = NULL;
1349 #endif
1350 
1351   __kmp_all_nth = 0;
1352   TCW_4(__kmp_nth, 0);
1353 
1354   __kmp_thread_pool = NULL;
1355   __kmp_thread_pool_insert_pt = NULL;
1356   __kmp_team_pool = NULL;
1357 
1358   /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate
1359      here so threadprivate doesn't use stale data */
1360   KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
1361                 __kmp_threadpriv_cache_list));
1362 
1363   while (__kmp_threadpriv_cache_list != NULL) {
1364 
1365     if (*__kmp_threadpriv_cache_list->addr != NULL) {
1366       KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
1367                     &(*__kmp_threadpriv_cache_list->addr)));
1368 
1369       *__kmp_threadpriv_cache_list->addr = NULL;
1370     }
1371     __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
1372   }
1373 
1374   __kmp_init_runtime = FALSE;
1375 
1376   /* reset statically initialized locks */
1377   __kmp_init_bootstrap_lock(&__kmp_initz_lock);
1378   __kmp_init_bootstrap_lock(&__kmp_stdio_lock);
1379   __kmp_init_bootstrap_lock(&__kmp_console_lock);
1380   __kmp_init_bootstrap_lock(&__kmp_task_team_lock);
1381 
1382 #if USE_ITT_BUILD
1383   __kmp_itt_reset(); // reset ITT's global state
1384 #endif /* USE_ITT_BUILD */
1385 
1386   {
1387     // Child process often get terminated without any use of OpenMP. That might
1388     // cause mapped shared memory file to be left unattended. Thus we postpone
1389     // library registration till middle initialization in the child process.
1390     __kmp_need_register_serial = FALSE;
1391     __kmp_serial_initialize();
1392   }
1393 
1394   /* This is necessary to make sure no stale data is left around */
1395   /* AC: customers complain that we use unsafe routines in the atfork
1396      handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen
1397      in dynamic_link when check the presence of shared tbbmalloc library.
1398      Suggestion is to make the library initialization lazier, similar
1399      to what done for __kmpc_begin(). */
1400   // TODO: synchronize all static initializations with regular library
1401   //       startup; look at kmp_global.cpp and etc.
1402   //__kmp_internal_begin ();
1403 }
1404 
1405 void __kmp_register_atfork(void) {
1406   if (__kmp_need_register_atfork) {
1407 #if !KMP_OS_WASI
1408     int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent,
1409                                 __kmp_atfork_child);
1410     KMP_CHECK_SYSFAIL("pthread_atfork", status);
1411 #endif
1412     __kmp_need_register_atfork = FALSE;
1413   }
1414 }
1415 
1416 void __kmp_suspend_initialize(void) {
1417   int status;
1418   status = pthread_mutexattr_init(&__kmp_suspend_mutex_attr);
1419   KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1420   status = pthread_condattr_init(&__kmp_suspend_cond_attr);
1421   KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1422 }
1423 
1424 void __kmp_suspend_initialize_thread(kmp_info_t *th) {
1425   int old_value = KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count);
1426   int new_value = __kmp_fork_count + 1;
1427   // Return if already initialized
1428   if (old_value == new_value)
1429     return;
1430   // Wait, then return if being initialized
1431   if (old_value == -1 || !__kmp_atomic_compare_store(
1432                              &th->th.th_suspend_init_count, old_value, -1)) {
1433     while (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) != new_value) {
1434       KMP_CPU_PAUSE();
1435     }
1436   } else {
1437     // Claim to be the initializer and do initializations
1438     int status;
1439     status = pthread_cond_init(&th->th.th_suspend_cv.c_cond,
1440                                &__kmp_suspend_cond_attr);
1441     KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1442     status = pthread_mutex_init(&th->th.th_suspend_mx.m_mutex,
1443                                 &__kmp_suspend_mutex_attr);
1444     KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1445     KMP_ATOMIC_ST_REL(&th->th.th_suspend_init_count, new_value);
1446   }
1447 }
1448 
1449 void __kmp_suspend_uninitialize_thread(kmp_info_t *th) {
1450   if (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) > __kmp_fork_count) {
1451     /* this means we have initialize the suspension pthread objects for this
1452        thread in this instance of the process */
1453     int status;
1454 
1455     status = pthread_cond_destroy(&th->th.th_suspend_cv.c_cond);
1456     if (status != 0 && status != EBUSY) {
1457       KMP_SYSFAIL("pthread_cond_destroy", status);
1458     }
1459     status = pthread_mutex_destroy(&th->th.th_suspend_mx.m_mutex);
1460     if (status != 0 && status != EBUSY) {
1461       KMP_SYSFAIL("pthread_mutex_destroy", status);
1462     }
1463     --th->th.th_suspend_init_count;
1464     KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count) ==
1465                      __kmp_fork_count);
1466   }
1467 }
1468 
1469 // return true if lock obtained, false otherwise
1470 int __kmp_try_suspend_mx(kmp_info_t *th) {
1471   return (pthread_mutex_trylock(&th->th.th_suspend_mx.m_mutex) == 0);
1472 }
1473 
1474 void __kmp_lock_suspend_mx(kmp_info_t *th) {
1475   int status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1476   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1477 }
1478 
1479 void __kmp_unlock_suspend_mx(kmp_info_t *th) {
1480   int status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1481   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1482 }
1483 
1484 /* This routine puts the calling thread to sleep after setting the
1485    sleep bit for the indicated flag variable to true. */
1486 template <class C>
1487 static inline void __kmp_suspend_template(int th_gtid, C *flag) {
1488   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend);
1489   kmp_info_t *th = __kmp_threads[th_gtid];
1490   int status;
1491   typename C::flag_t old_spin;
1492 
1493   KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid,
1494                 flag->get()));
1495 
1496   __kmp_suspend_initialize_thread(th);
1497 
1498   __kmp_lock_suspend_mx(th);
1499 
1500   KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
1501                 th_gtid, flag->get()));
1502 
1503   /* TODO: shouldn't this use release semantics to ensure that
1504      __kmp_suspend_initialize_thread gets called first? */
1505   old_spin = flag->set_sleeping();
1506   TCW_PTR(th->th.th_sleep_loc, (void *)flag);
1507   th->th.th_sleep_loc_type = flag->get_type();
1508   if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1509       __kmp_pause_status != kmp_soft_paused) {
1510     flag->unset_sleeping();
1511     TCW_PTR(th->th.th_sleep_loc, NULL);
1512     th->th.th_sleep_loc_type = flag_unset;
1513     __kmp_unlock_suspend_mx(th);
1514     return;
1515   }
1516   KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
1517                " was %x\n",
1518                th_gtid, flag->get(), flag->load(), old_spin));
1519 
1520   if (flag->done_check_val(old_spin) || flag->done_check()) {
1521     flag->unset_sleeping();
1522     TCW_PTR(th->th.th_sleep_loc, NULL);
1523     th->th.th_sleep_loc_type = flag_unset;
1524     KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
1525                  "for spin(%p)\n",
1526                  th_gtid, flag->get()));
1527   } else {
1528     /* Encapsulate in a loop as the documentation states that this may
1529        "with low probability" return when the condition variable has
1530        not been signaled or broadcast */
1531     int deactivated = FALSE;
1532 
1533     while (flag->is_sleeping()) {
1534 #ifdef DEBUG_SUSPEND
1535       char buffer[128];
1536       __kmp_suspend_count++;
1537       __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1538       __kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid,
1539                    buffer);
1540 #endif
1541       // Mark the thread as no longer active (only in the first iteration of the
1542       // loop).
1543       if (!deactivated) {
1544         th->th.th_active = FALSE;
1545         if (th->th.th_active_in_pool) {
1546           th->th.th_active_in_pool = FALSE;
1547           KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
1548           KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
1549         }
1550         deactivated = TRUE;
1551       }
1552 
1553       KMP_DEBUG_ASSERT(th->th.th_sleep_loc);
1554       KMP_DEBUG_ASSERT(flag->get_type() == th->th.th_sleep_loc_type);
1555 
1556 #if USE_SUSPEND_TIMEOUT
1557       struct timespec now;
1558       struct timeval tval;
1559       int msecs;
1560 
1561       status = gettimeofday(&tval, NULL);
1562       KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1563       TIMEVAL_TO_TIMESPEC(&tval, &now);
1564 
1565       msecs = (4 * __kmp_dflt_blocktime) + 200;
1566       now.tv_sec += msecs / 1000;
1567       now.tv_nsec += (msecs % 1000) * 1000;
1568 
1569       KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
1570                     "pthread_cond_timedwait\n",
1571                     th_gtid));
1572       status = pthread_cond_timedwait(&th->th.th_suspend_cv.c_cond,
1573                                       &th->th.th_suspend_mx.m_mutex, &now);
1574 #else
1575       KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform"
1576                     " pthread_cond_wait\n",
1577                     th_gtid));
1578       status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
1579                                  &th->th.th_suspend_mx.m_mutex);
1580 #endif // USE_SUSPEND_TIMEOUT
1581 
1582       if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
1583         KMP_SYSFAIL("pthread_cond_wait", status);
1584       }
1585 
1586       KMP_DEBUG_ASSERT(flag->get_type() == flag->get_ptr_type());
1587 
1588       if (!flag->is_sleeping() &&
1589           ((status == EINTR) || (status == ETIMEDOUT))) {
1590         // if interrupt or timeout, and thread is no longer sleeping, we need to
1591         // make sure sleep_loc gets reset; however, this shouldn't be needed if
1592         // we woke up with resume
1593         flag->unset_sleeping();
1594         TCW_PTR(th->th.th_sleep_loc, NULL);
1595         th->th.th_sleep_loc_type = flag_unset;
1596       }
1597 #ifdef KMP_DEBUG
1598       if (status == ETIMEDOUT) {
1599         if (flag->is_sleeping()) {
1600           KF_TRACE(100,
1601                    ("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid));
1602         } else {
1603           KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
1604                        "not set!\n",
1605                        th_gtid));
1606           TCW_PTR(th->th.th_sleep_loc, NULL);
1607           th->th.th_sleep_loc_type = flag_unset;
1608         }
1609       } else if (flag->is_sleeping()) {
1610         KF_TRACE(100,
1611                  ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));
1612       }
1613 #endif
1614     } // while
1615 
1616     // Mark the thread as active again (if it was previous marked as inactive)
1617     if (deactivated) {
1618       th->th.th_active = TRUE;
1619       if (TCR_4(th->th.th_in_pool)) {
1620         KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
1621         th->th.th_active_in_pool = TRUE;
1622       }
1623     }
1624   }
1625   // We may have had the loop variable set before entering the loop body;
1626   // so we need to reset sleep_loc.
1627   TCW_PTR(th->th.th_sleep_loc, NULL);
1628   th->th.th_sleep_loc_type = flag_unset;
1629 
1630   KMP_DEBUG_ASSERT(!flag->is_sleeping());
1631   KMP_DEBUG_ASSERT(!th->th.th_sleep_loc);
1632 #ifdef DEBUG_SUSPEND
1633   {
1634     char buffer[128];
1635     __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1636     __kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid,
1637                  buffer);
1638   }
1639 #endif
1640 
1641   __kmp_unlock_suspend_mx(th);
1642   KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
1643 }
1644 
1645 template <bool C, bool S>
1646 void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {
1647   __kmp_suspend_template(th_gtid, flag);
1648 }
1649 template <bool C, bool S>
1650 void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
1651   __kmp_suspend_template(th_gtid, flag);
1652 }
1653 template <bool C, bool S>
1654 void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
1655   __kmp_suspend_template(th_gtid, flag);
1656 }
1657 void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
1658   __kmp_suspend_template(th_gtid, flag);
1659 }
1660 
1661 template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
1662 template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
1663 template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
1664 template void
1665 __kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
1666 template void
1667 __kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
1668 
1669 /* This routine signals the thread specified by target_gtid to wake up
1670    after setting the sleep bit indicated by the flag argument to FALSE.
1671    The target thread must already have called __kmp_suspend_template() */
1672 template <class C>
1673 static inline void __kmp_resume_template(int target_gtid, C *flag) {
1674   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1675   kmp_info_t *th = __kmp_threads[target_gtid];
1676   int status;
1677 
1678 #ifdef KMP_DEBUG
1679   int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1680 #endif
1681 
1682   KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",
1683                 gtid, target_gtid));
1684   KMP_DEBUG_ASSERT(gtid != target_gtid);
1685 
1686   __kmp_suspend_initialize_thread(th);
1687 
1688   __kmp_lock_suspend_mx(th);
1689 
1690   if (!flag || flag != th->th.th_sleep_loc) {
1691     // coming from __kmp_null_resume_wrapper, or thread is now sleeping on a
1692     // different location; wake up at new location
1693     flag = (C *)CCAST(void *, th->th.th_sleep_loc);
1694   }
1695 
1696   // First, check if the flag is null or its type has changed. If so, someone
1697   // else woke it up.
1698   if (!flag) { // Thread doesn't appear to be sleeping on anything
1699     KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1700                  "awake: flag(%p)\n",
1701                  gtid, target_gtid, (void *)NULL));
1702     __kmp_unlock_suspend_mx(th);
1703     return;
1704   } else if (flag->get_type() != th->th.th_sleep_loc_type) {
1705     // Flag type does not appear to match this function template; possibly the
1706     // thread is sleeping on something else. Try null resume again.
1707     KF_TRACE(
1708         5,
1709         ("__kmp_resume_template: T#%d retrying, thread T#%d Mismatch flag(%p), "
1710          "spin(%p) type=%d ptr_type=%d\n",
1711          gtid, target_gtid, flag, flag->get(), flag->get_type(),
1712          th->th.th_sleep_loc_type));
1713     __kmp_unlock_suspend_mx(th);
1714     __kmp_null_resume_wrapper(th);
1715     return;
1716   } else { // if multiple threads are sleeping, flag should be internally
1717     // referring to a specific thread here
1718     if (!flag->is_sleeping()) {
1719       KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1720                    "awake: flag(%p): %u\n",
1721                    gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
1722       __kmp_unlock_suspend_mx(th);
1723       return;
1724     }
1725   }
1726   KMP_DEBUG_ASSERT(flag);
1727   flag->unset_sleeping();
1728   TCW_PTR(th->th.th_sleep_loc, NULL);
1729   th->th.th_sleep_loc_type = flag_unset;
1730 
1731   KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
1732                "sleep bit for flag's loc(%p): %u\n",
1733                gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
1734 
1735 #ifdef DEBUG_SUSPEND
1736   {
1737     char buffer[128];
1738     __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1739     __kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid,
1740                  target_gtid, buffer);
1741   }
1742 #endif
1743   status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond);
1744   KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1745   __kmp_unlock_suspend_mx(th);
1746   KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
1747                 " for T#%d\n",
1748                 gtid, target_gtid));
1749 }
1750 
1751 template <bool C, bool S>
1752 void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {
1753   __kmp_resume_template(target_gtid, flag);
1754 }
1755 template <bool C, bool S>
1756 void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
1757   __kmp_resume_template(target_gtid, flag);
1758 }
1759 template <bool C, bool S>
1760 void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {
1761   __kmp_resume_template(target_gtid, flag);
1762 }
1763 void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
1764   __kmp_resume_template(target_gtid, flag);
1765 }
1766 
1767 template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
1768 template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
1769 template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
1770 template void
1771 __kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
1772 
1773 #if KMP_USE_MONITOR
1774 void __kmp_resume_monitor() {
1775   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1776   int status;
1777 #ifdef KMP_DEBUG
1778   int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1779   KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid,
1780                 KMP_GTID_MONITOR));
1781   KMP_DEBUG_ASSERT(gtid != KMP_GTID_MONITOR);
1782 #endif
1783   status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
1784   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1785 #ifdef DEBUG_SUSPEND
1786   {
1787     char buffer[128];
1788     __kmp_print_cond(buffer, &__kmp_wait_cv.c_cond);
1789     __kmp_printf("__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid,
1790                  KMP_GTID_MONITOR, buffer);
1791   }
1792 #endif
1793   status = pthread_cond_signal(&__kmp_wait_cv.c_cond);
1794   KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1795   status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
1796   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1797   KF_TRACE(30, ("__kmp_resume_monitor: T#%d exiting after signaling wake up"
1798                 " for T#%d\n",
1799                 gtid, KMP_GTID_MONITOR));
1800 }
1801 #endif // KMP_USE_MONITOR
1802 
1803 void __kmp_yield() { sched_yield(); }
1804 
1805 void __kmp_gtid_set_specific(int gtid) {
1806   if (__kmp_init_gtid) {
1807     int status;
1808     status = pthread_setspecific(__kmp_gtid_threadprivate_key,
1809                                  (void *)(intptr_t)(gtid + 1));
1810     KMP_CHECK_SYSFAIL("pthread_setspecific", status);
1811   } else {
1812     KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n"));
1813   }
1814 }
1815 
1816 int __kmp_gtid_get_specific() {
1817   int gtid;
1818   if (!__kmp_init_gtid) {
1819     KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning "
1820                   "KMP_GTID_SHUTDOWN\n"));
1821     return KMP_GTID_SHUTDOWN;
1822   }
1823   gtid = (int)(size_t)pthread_getspecific(__kmp_gtid_threadprivate_key);
1824   if (gtid == 0) {
1825     gtid = KMP_GTID_DNE;
1826   } else {
1827     gtid--;
1828   }
1829   KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
1830                 __kmp_gtid_threadprivate_key, gtid));
1831   return gtid;
1832 }
1833 
1834 double __kmp_read_cpu_time(void) {
1835   /*clock_t   t;*/
1836   struct tms buffer;
1837 
1838   /*t =*/times(&buffer);
1839 
1840   return (double)(buffer.tms_utime + buffer.tms_cutime) /
1841          (double)CLOCKS_PER_SEC;
1842 }
1843 
1844 int __kmp_read_system_info(struct kmp_sys_info *info) {
1845   int status;
1846   struct rusage r_usage;
1847 
1848   memset(info, 0, sizeof(*info));
1849 
1850   status = getrusage(RUSAGE_SELF, &r_usage);
1851   KMP_CHECK_SYSFAIL_ERRNO("getrusage", status);
1852 
1853 #if !KMP_OS_WASI
1854   // The maximum resident set size utilized (in kilobytes)
1855   info->maxrss = r_usage.ru_maxrss;
1856   // The number of page faults serviced without any I/O
1857   info->minflt = r_usage.ru_minflt;
1858   // The number of page faults serviced that required I/O
1859   info->majflt = r_usage.ru_majflt;
1860   // The number of times a process was "swapped" out of memory
1861   info->nswap = r_usage.ru_nswap;
1862   // The number of times the file system had to perform input
1863   info->inblock = r_usage.ru_inblock;
1864   // The number of times the file system had to perform output
1865   info->oublock = r_usage.ru_oublock;
1866   // The number of times a context switch was voluntarily
1867   info->nvcsw = r_usage.ru_nvcsw;
1868   // The number of times a context switch was forced
1869   info->nivcsw = r_usage.ru_nivcsw;
1870 #endif
1871 
1872   return (status != 0);
1873 }
1874 
1875 void __kmp_read_system_time(double *delta) {
1876   double t_ns;
1877   struct timeval tval;
1878   struct timespec stop;
1879   int status;
1880 
1881   status = gettimeofday(&tval, NULL);
1882   KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1883   TIMEVAL_TO_TIMESPEC(&tval, &stop);
1884   t_ns = (double)(TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start));
1885   *delta = (t_ns * 1e-9);
1886 }
1887 
1888 void __kmp_clear_system_time(void) {
1889   struct timeval tval;
1890   int status;
1891   status = gettimeofday(&tval, NULL);
1892   KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1893   TIMEVAL_TO_TIMESPEC(&tval, &__kmp_sys_timer_data.start);
1894 }
1895 
1896 static int __kmp_get_xproc(void) {
1897 
1898   int r = 0;
1899 
1900 #if KMP_OS_LINUX
1901 
1902   __kmp_type_convert(sysconf(_SC_NPROCESSORS_CONF), &(r));
1903 
1904 #elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_OPENBSD || \
1905     KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
1906 
1907   __kmp_type_convert(sysconf(_SC_NPROCESSORS_ONLN), &(r));
1908 
1909 #elif KMP_OS_DARWIN
1910 
1911   size_t len = sizeof(r);
1912   sysctlbyname("hw.logicalcpu", &r, &len, NULL, 0);
1913 
1914 #else
1915 
1916 #error "Unknown or unsupported OS."
1917 
1918 #endif
1919 
1920   return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */
1921 
1922 } // __kmp_get_xproc
1923 
1924 int __kmp_read_from_file(char const *path, char const *format, ...) {
1925   int result;
1926   va_list args;
1927 
1928   va_start(args, format);
1929   FILE *f = fopen(path, "rb");
1930   if (f == NULL) {
1931     va_end(args);
1932     return 0;
1933   }
1934   result = vfscanf(f, format, args);
1935   fclose(f);
1936   va_end(args);
1937 
1938   return result;
1939 }
1940 
1941 void __kmp_runtime_initialize(void) {
1942   int status;
1943   pthread_mutexattr_t mutex_attr;
1944   pthread_condattr_t cond_attr;
1945 
1946   if (__kmp_init_runtime) {
1947     return;
1948   }
1949 
1950 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1951   if (!__kmp_cpuinfo.initialized) {
1952     __kmp_query_cpuid(&__kmp_cpuinfo);
1953   }
1954 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1955 
1956   __kmp_xproc = __kmp_get_xproc();
1957 
1958 #if !KMP_32_BIT_ARCH
1959   struct rlimit rlim;
1960   // read stack size of calling thread, save it as default for worker threads;
1961   // this should be done before reading environment variables
1962   status = getrlimit(RLIMIT_STACK, &rlim);
1963   if (status == 0) { // success?
1964     __kmp_stksize = rlim.rlim_cur;
1965     __kmp_check_stksize(&__kmp_stksize); // check value and adjust if needed
1966   }
1967 #endif /* KMP_32_BIT_ARCH */
1968 
1969   if (sysconf(_SC_THREADS)) {
1970 
1971     /* Query the maximum number of threads */
1972     __kmp_type_convert(sysconf(_SC_THREAD_THREADS_MAX), &(__kmp_sys_max_nth));
1973 #ifdef __ve__
1974     if (__kmp_sys_max_nth == -1) {
1975       // VE's pthread supports only up to 64 threads per a VE process.
1976       // So we use that KMP_MAX_NTH (predefined as 64) here.
1977       __kmp_sys_max_nth = KMP_MAX_NTH;
1978     }
1979 #else
1980     if (__kmp_sys_max_nth == -1) {
1981       /* Unlimited threads for NPTL */
1982       __kmp_sys_max_nth = INT_MAX;
1983     } else if (__kmp_sys_max_nth <= 1) {
1984       /* Can't tell, just use PTHREAD_THREADS_MAX */
1985       __kmp_sys_max_nth = KMP_MAX_NTH;
1986     }
1987 #endif
1988 
1989     /* Query the minimum stack size */
1990     __kmp_sys_min_stksize = sysconf(_SC_THREAD_STACK_MIN);
1991     if (__kmp_sys_min_stksize <= 1) {
1992       __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
1993     }
1994   }
1995 
1996   /* Set up minimum number of threads to switch to TLS gtid */
1997   __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
1998 
1999   status = pthread_key_create(&__kmp_gtid_threadprivate_key,
2000                               __kmp_internal_end_dest);
2001   KMP_CHECK_SYSFAIL("pthread_key_create", status);
2002   status = pthread_mutexattr_init(&mutex_attr);
2003   KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
2004   status = pthread_mutex_init(&__kmp_wait_mx.m_mutex, &mutex_attr);
2005   KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
2006   status = pthread_mutexattr_destroy(&mutex_attr);
2007   KMP_CHECK_SYSFAIL("pthread_mutexattr_destroy", status);
2008   status = pthread_condattr_init(&cond_attr);
2009   KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
2010   status = pthread_cond_init(&__kmp_wait_cv.c_cond, &cond_attr);
2011   KMP_CHECK_SYSFAIL("pthread_cond_init", status);
2012   status = pthread_condattr_destroy(&cond_attr);
2013   KMP_CHECK_SYSFAIL("pthread_condattr_destroy", status);
2014 #if USE_ITT_BUILD
2015   __kmp_itt_initialize();
2016 #endif /* USE_ITT_BUILD */
2017 
2018   __kmp_init_runtime = TRUE;
2019 }
2020 
2021 void __kmp_runtime_destroy(void) {
2022   int status;
2023 
2024   if (!__kmp_init_runtime) {
2025     return; // Nothing to do.
2026   }
2027 
2028 #if USE_ITT_BUILD
2029   __kmp_itt_destroy();
2030 #endif /* USE_ITT_BUILD */
2031 
2032   status = pthread_key_delete(__kmp_gtid_threadprivate_key);
2033   KMP_CHECK_SYSFAIL("pthread_key_delete", status);
2034 
2035   status = pthread_mutex_destroy(&__kmp_wait_mx.m_mutex);
2036   if (status != 0 && status != EBUSY) {
2037     KMP_SYSFAIL("pthread_mutex_destroy", status);
2038   }
2039   status = pthread_cond_destroy(&__kmp_wait_cv.c_cond);
2040   if (status != 0 && status != EBUSY) {
2041     KMP_SYSFAIL("pthread_cond_destroy", status);
2042   }
2043 #if KMP_AFFINITY_SUPPORTED
2044   __kmp_affinity_uninitialize();
2045 #endif
2046 
2047   __kmp_init_runtime = FALSE;
2048 }
2049 
2050 /* Put the thread to sleep for a time period */
2051 /* NOTE: not currently used anywhere */
2052 void __kmp_thread_sleep(int millis) { sleep((millis + 500) / 1000); }
2053 
2054 /* Calculate the elapsed wall clock time for the user */
2055 void __kmp_elapsed(double *t) {
2056   int status;
2057 #ifdef FIX_SGI_CLOCK
2058   struct timespec ts;
2059 
2060   status = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
2061   KMP_CHECK_SYSFAIL_ERRNO("clock_gettime", status);
2062   *t =
2063       (double)ts.tv_nsec * (1.0 / (double)KMP_NSEC_PER_SEC) + (double)ts.tv_sec;
2064 #else
2065   struct timeval tv;
2066 
2067   status = gettimeofday(&tv, NULL);
2068   KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
2069   *t =
2070       (double)tv.tv_usec * (1.0 / (double)KMP_USEC_PER_SEC) + (double)tv.tv_sec;
2071 #endif
2072 }
2073 
2074 /* Calculate the elapsed wall clock tick for the user */
2075 void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; }
2076 
2077 /* Return the current time stamp in nsec */
2078 kmp_uint64 __kmp_now_nsec() {
2079   struct timeval t;
2080   gettimeofday(&t, NULL);
2081   kmp_uint64 nsec = (kmp_uint64)KMP_NSEC_PER_SEC * (kmp_uint64)t.tv_sec +
2082                     (kmp_uint64)1000 * (kmp_uint64)t.tv_usec;
2083   return nsec;
2084 }
2085 
2086 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2087 /* Measure clock ticks per millisecond */
2088 void __kmp_initialize_system_tick() {
2089   kmp_uint64 now, nsec2, diff;
2090   kmp_uint64 delay = 1000000; // ~450 usec on most machines.
2091   kmp_uint64 nsec = __kmp_now_nsec();
2092   kmp_uint64 goal = __kmp_hardware_timestamp() + delay;
2093   while ((now = __kmp_hardware_timestamp()) < goal)
2094     ;
2095   nsec2 = __kmp_now_nsec();
2096   diff = nsec2 - nsec;
2097   if (diff > 0) {
2098     double tpus = 1000.0 * (double)(delay + (now - goal)) / (double)diff;
2099     if (tpus > 0.0) {
2100       __kmp_ticks_per_msec = (kmp_uint64)(tpus * 1000.0);
2101       __kmp_ticks_per_usec = (kmp_uint64)tpus;
2102     }
2103   }
2104 }
2105 #endif
2106 
2107 /* Determine whether the given address is mapped into the current address
2108    space. */
2109 
2110 int __kmp_is_address_mapped(void *addr) {
2111 
2112   int found = 0;
2113   int rc;
2114 
2115 #if KMP_OS_LINUX || KMP_OS_HURD
2116 
2117   /* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the
2118      address ranges mapped into the address space. */
2119 
2120   char *name = __kmp_str_format("/proc/%d/maps", getpid());
2121   FILE *file = NULL;
2122 
2123   file = fopen(name, "r");
2124   KMP_ASSERT(file != NULL);
2125 
2126   for (;;) {
2127 
2128     void *beginning = NULL;
2129     void *ending = NULL;
2130     char perms[5];
2131 
2132     rc = fscanf(file, "%p-%p %4s %*[^\n]\n", &beginning, &ending, perms);
2133     if (rc == EOF) {
2134       break;
2135     }
2136     KMP_ASSERT(rc == 3 &&
2137                KMP_STRLEN(perms) == 4); // Make sure all fields are read.
2138 
2139     // Ending address is not included in the region, but beginning is.
2140     if ((addr >= beginning) && (addr < ending)) {
2141       perms[2] = 0; // 3th and 4th character does not matter.
2142       if (strcmp(perms, "rw") == 0) {
2143         // Memory we are looking for should be readable and writable.
2144         found = 1;
2145       }
2146       break;
2147     }
2148   }
2149 
2150   // Free resources.
2151   fclose(file);
2152   KMP_INTERNAL_FREE(name);
2153 #elif KMP_OS_FREEBSD
2154   char *buf;
2155   size_t lstsz;
2156   int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()};
2157   rc = sysctl(mib, 4, NULL, &lstsz, NULL, 0);
2158   if (rc < 0)
2159     return 0;
2160   // We pass from number of vm entry's semantic
2161   // to size of whole entry map list.
2162   lstsz = lstsz * 4 / 3;
2163   buf = reinterpret_cast<char *>(KMP_INTERNAL_MALLOC(lstsz));
2164   rc = sysctl(mib, 4, buf, &lstsz, NULL, 0);
2165   if (rc < 0) {
2166     KMP_INTERNAL_FREE(buf);
2167     return 0;
2168   }
2169 
2170   char *lw = buf;
2171   char *up = buf + lstsz;
2172 
2173   while (lw < up) {
2174     struct kinfo_vmentry *cur = reinterpret_cast<struct kinfo_vmentry *>(lw);
2175     size_t cursz = cur->kve_structsize;
2176     if (cursz == 0)
2177       break;
2178     void *start = reinterpret_cast<void *>(cur->kve_start);
2179     void *end = reinterpret_cast<void *>(cur->kve_end);
2180     // Readable/Writable addresses within current map entry
2181     if ((addr >= start) && (addr < end)) {
2182       if ((cur->kve_protection & KVME_PROT_READ) != 0 &&
2183           (cur->kve_protection & KVME_PROT_WRITE) != 0) {
2184         found = 1;
2185         break;
2186       }
2187     }
2188     lw += cursz;
2189   }
2190   KMP_INTERNAL_FREE(buf);
2191 #elif KMP_OS_DRAGONFLY
2192   char err[_POSIX2_LINE_MAX];
2193   kinfo_proc *proc;
2194   vmspace sp;
2195   vm_map *cur;
2196   vm_map_entry entry, *c;
2197   struct proc p;
2198   kvm_t *fd;
2199   uintptr_t uaddr;
2200   int num;
2201 
2202   fd = kvm_openfiles(nullptr, nullptr, nullptr, O_RDONLY, err);
2203   if (!fd) {
2204     return 0;
2205   }
2206 
2207   proc = kvm_getprocs(fd, KERN_PROC_PID, getpid(), &num);
2208 
2209   if (kvm_read(fd, static_cast<uintptr_t>(proc->kp_paddr), &p, sizeof(p)) !=
2210           sizeof(p) ||
2211       kvm_read(fd, reinterpret_cast<uintptr_t>(p.p_vmspace), &sp, sizeof(sp)) !=
2212           sizeof(sp)) {
2213     kvm_close(fd);
2214     return 0;
2215   }
2216 
2217   (void)rc;
2218   cur = &sp.vm_map;
2219   uaddr = reinterpret_cast<uintptr_t>(addr);
2220   for (c = kvm_vm_map_entry_first(fd, cur, &entry); c;
2221        c = kvm_vm_map_entry_next(fd, c, &entry)) {
2222     if ((uaddr >= entry.ba.start) && (uaddr <= entry.ba.end)) {
2223       if ((entry.protection & VM_PROT_READ) != 0 &&
2224           (entry.protection & VM_PROT_WRITE) != 0) {
2225         found = 1;
2226         break;
2227       }
2228     }
2229   }
2230 
2231   kvm_close(fd);
2232 #elif KMP_OS_SOLARIS
2233   prmap_t *cur, *map;
2234   void *buf;
2235   uintptr_t uaddr;
2236   ssize_t rd;
2237   int err;
2238   int file;
2239 
2240   pid_t pid = getpid();
2241   struct ps_prochandle *fd = Pgrab(pid, PGRAB_RDONLY, &err);
2242   ;
2243 
2244   if (!fd) {
2245     return 0;
2246   }
2247 
2248   char *name = __kmp_str_format("/proc/%d/map", pid);
2249   size_t sz = (1 << 20);
2250   file = open(name, O_RDONLY);
2251   if (file == -1) {
2252     KMP_INTERNAL_FREE(name);
2253     return 0;
2254   }
2255 
2256   buf = KMP_INTERNAL_MALLOC(sz);
2257 
2258   while (sz > 0 && (rd = pread(file, buf, sz, 0)) == sz) {
2259     void *newbuf;
2260     sz <<= 1;
2261     newbuf = KMP_INTERNAL_REALLOC(buf, sz);
2262     buf = newbuf;
2263   }
2264 
2265   map = reinterpret_cast<prmap_t *>(buf);
2266   uaddr = reinterpret_cast<uintptr_t>(addr);
2267 
2268   for (cur = map; rd > 0; cur++, rd = -sizeof(*map)) {
2269     if ((uaddr >= cur->pr_vaddr) && (uaddr < cur->pr_vaddr)) {
2270       if ((cur->pr_mflags & MA_READ) != 0 && (cur->pr_mflags & MA_WRITE) != 0) {
2271         found = 1;
2272         break;
2273       }
2274     }
2275   }
2276 
2277   KMP_INTERNAL_FREE(map);
2278   close(file);
2279   KMP_INTERNAL_FREE(name);
2280 #elif KMP_OS_DARWIN
2281 
2282   /* On OS X*, /proc pseudo filesystem is not available. Try to read memory
2283      using vm interface. */
2284 
2285   int buffer;
2286   vm_size_t count;
2287   rc = vm_read_overwrite(
2288       mach_task_self(), // Task to read memory of.
2289       (vm_address_t)(addr), // Address to read from.
2290       1, // Number of bytes to be read.
2291       (vm_address_t)(&buffer), // Address of buffer to save read bytes in.
2292       &count // Address of var to save number of read bytes in.
2293   );
2294   if (rc == 0) {
2295     // Memory successfully read.
2296     found = 1;
2297   }
2298 
2299 #elif KMP_OS_NETBSD
2300 
2301   int mib[5];
2302   mib[0] = CTL_VM;
2303   mib[1] = VM_PROC;
2304   mib[2] = VM_PROC_MAP;
2305   mib[3] = getpid();
2306   mib[4] = sizeof(struct kinfo_vmentry);
2307 
2308   size_t size;
2309   rc = sysctl(mib, __arraycount(mib), NULL, &size, NULL, 0);
2310   KMP_ASSERT(!rc);
2311   KMP_ASSERT(size);
2312 
2313   size = size * 4 / 3;
2314   struct kinfo_vmentry *kiv = (struct kinfo_vmentry *)KMP_INTERNAL_MALLOC(size);
2315   KMP_ASSERT(kiv);
2316 
2317   rc = sysctl(mib, __arraycount(mib), kiv, &size, NULL, 0);
2318   KMP_ASSERT(!rc);
2319   KMP_ASSERT(size);
2320 
2321   for (size_t i = 0; i < size; i++) {
2322     if (kiv[i].kve_start >= (uint64_t)addr &&
2323         kiv[i].kve_end <= (uint64_t)addr) {
2324       found = 1;
2325       break;
2326     }
2327   }
2328   KMP_INTERNAL_FREE(kiv);
2329 #elif KMP_OS_OPENBSD
2330 
2331   int mib[3];
2332   mib[0] = CTL_KERN;
2333   mib[1] = KERN_PROC_VMMAP;
2334   mib[2] = getpid();
2335 
2336   size_t size;
2337   uint64_t end;
2338   rc = sysctl(mib, 3, NULL, &size, NULL, 0);
2339   KMP_ASSERT(!rc);
2340   KMP_ASSERT(size);
2341   end = size;
2342 
2343   struct kinfo_vmentry kiv = {.kve_start = 0};
2344 
2345   while ((rc = sysctl(mib, 3, &kiv, &size, NULL, 0)) == 0) {
2346     KMP_ASSERT(size);
2347     if (kiv.kve_end == end)
2348       break;
2349 
2350     if (kiv.kve_start >= (uint64_t)addr && kiv.kve_end <= (uint64_t)addr) {
2351       found = 1;
2352       break;
2353     }
2354     kiv.kve_start += 1;
2355   }
2356 #elif KMP_OS_WASI
2357   found = (int)addr < (__builtin_wasm_memory_size(0) * PAGESIZE);
2358 #elif KMP_OS_AIX
2359 
2360   uint32_t loadQueryBufSize = 4096u; // Default loadquery buffer size.
2361   char *loadQueryBuf;
2362 
2363   for (;;) {
2364     loadQueryBuf = (char *)KMP_INTERNAL_MALLOC(loadQueryBufSize);
2365     if (loadQueryBuf == NULL) {
2366       return 0;
2367     }
2368 
2369     rc = loadquery(L_GETXINFO | L_IGNOREUNLOAD, loadQueryBuf, loadQueryBufSize);
2370     if (rc < 0) {
2371       KMP_INTERNAL_FREE(loadQueryBuf);
2372       if (errno != ENOMEM) {
2373         return 0;
2374       }
2375       // errno == ENOMEM; double the size.
2376       loadQueryBufSize <<= 1;
2377       continue;
2378     }
2379     // Obtained the load info successfully.
2380     break;
2381   }
2382 
2383   struct ld_xinfo *curLdInfo = (struct ld_xinfo *)loadQueryBuf;
2384 
2385   // Loop through the load info to find if there is a match.
2386   for (;;) {
2387     uintptr_t curDataStart = (uintptr_t)curLdInfo->ldinfo_dataorg;
2388     uintptr_t curDataEnd = curDataStart + curLdInfo->ldinfo_datasize;
2389 
2390     // The data segment is readable and writable.
2391     if (curDataStart <= (uintptr_t)addr && (uintptr_t)addr < curDataEnd) {
2392       found = 1;
2393       break;
2394     }
2395     if (curLdInfo->ldinfo_next == 0u) {
2396       // Reached the end of load info.
2397       break;
2398     }
2399     curLdInfo = (struct ld_xinfo *)((char *)curLdInfo + curLdInfo->ldinfo_next);
2400   }
2401   KMP_INTERNAL_FREE(loadQueryBuf);
2402 
2403 #else
2404 
2405 #error "Unknown or unsupported OS"
2406 
2407 #endif
2408 
2409   return found;
2410 
2411 } // __kmp_is_address_mapped
2412 
2413 #ifdef USE_LOAD_BALANCE
2414 
2415 #if KMP_OS_DARWIN || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||    \
2416     KMP_OS_OPENBSD || KMP_OS_SOLARIS
2417 
2418 // The function returns the rounded value of the system load average
2419 // during given time interval which depends on the value of
2420 // __kmp_load_balance_interval variable (default is 60 sec, other values
2421 // may be 300 sec or 900 sec).
2422 // It returns -1 in case of error.
2423 int __kmp_get_load_balance(int max) {
2424   double averages[3];
2425   int ret_avg = 0;
2426 
2427   int res = getloadavg(averages, 3);
2428 
2429   // Check __kmp_load_balance_interval to determine which of averages to use.
2430   // getloadavg() may return the number of samples less than requested that is
2431   // less than 3.
2432   if (__kmp_load_balance_interval < 180 && (res >= 1)) {
2433     ret_avg = (int)averages[0]; // 1 min
2434   } else if ((__kmp_load_balance_interval >= 180 &&
2435               __kmp_load_balance_interval < 600) &&
2436              (res >= 2)) {
2437     ret_avg = (int)averages[1]; // 5 min
2438   } else if ((__kmp_load_balance_interval >= 600) && (res == 3)) {
2439     ret_avg = (int)averages[2]; // 15 min
2440   } else { // Error occurred
2441     return -1;
2442   }
2443 
2444   return ret_avg;
2445 }
2446 
2447 #elif KMP_OS_AIX
2448 
2449 // The function returns number of running (not sleeping) threads, or -1 in case
2450 // of error.
2451 int __kmp_get_load_balance(int max) {
2452 
2453   static int glb_running_threads = 0; // Saved count of the running threads for
2454                                       // the thread balance algorithm.
2455   static double glb_call_time = 0; // Thread balance algorithm call time.
2456   int running_threads = 0; // Number of running threads in the system.
2457 
2458   double call_time = 0.0;
2459 
2460   __kmp_elapsed(&call_time);
2461 
2462   if (glb_call_time &&
2463       (call_time - glb_call_time < __kmp_load_balance_interval))
2464     return glb_running_threads;
2465 
2466   glb_call_time = call_time;
2467 
2468   if (max <= 0) {
2469     max = INT_MAX;
2470   }
2471 
2472   // Check how many perfstat_cpu_t structures are available.
2473   int logical_cpus = perfstat_cpu(NULL, NULL, sizeof(perfstat_cpu_t), 0);
2474   if (logical_cpus <= 0) {
2475     glb_call_time = -1;
2476     return -1;
2477   }
2478 
2479   perfstat_cpu_t *cpu_stat = (perfstat_cpu_t *)KMP_INTERNAL_MALLOC(
2480       logical_cpus * sizeof(perfstat_cpu_t));
2481   if (cpu_stat == NULL) {
2482     glb_call_time = -1;
2483     return -1;
2484   }
2485 
2486   // Set first CPU as the name of the first logical CPU for which the info is
2487   // desired.
2488   perfstat_id_t first_cpu_name;
2489   strcpy(first_cpu_name.name, FIRST_CPU);
2490 
2491   // Get the stat info of logical CPUs.
2492   int rc = perfstat_cpu(&first_cpu_name, cpu_stat, sizeof(perfstat_cpu_t),
2493                         logical_cpus);
2494   KMP_DEBUG_ASSERT(rc == logical_cpus);
2495   if (rc <= 0) {
2496     KMP_INTERNAL_FREE(cpu_stat);
2497     glb_call_time = -1;
2498     return -1;
2499   }
2500   for (int i = 0; i < logical_cpus; ++i) {
2501     running_threads += cpu_stat[i].runque;
2502     if (running_threads >= max)
2503       break;
2504   }
2505 
2506   // There _might_ be a timing hole where the thread executing this
2507   // code gets skipped in the load balance, and running_threads is 0.
2508   // Assert in the debug builds only!!!
2509   KMP_DEBUG_ASSERT(running_threads > 0);
2510   if (running_threads <= 0)
2511     running_threads = 1;
2512 
2513   KMP_INTERNAL_FREE(cpu_stat);
2514 
2515   glb_running_threads = running_threads;
2516 
2517   return running_threads;
2518 }
2519 
2520 #else // Linux* OS
2521 
2522 // The function returns number of running (not sleeping) threads, or -1 in case
2523 // of error. Error could be reported if Linux* OS kernel too old (without
2524 // "/proc" support). Counting running threads stops if max running threads
2525 // encountered.
2526 int __kmp_get_load_balance(int max) {
2527   static int permanent_error = 0;
2528   static int glb_running_threads = 0; // Saved count of the running threads for
2529   // the thread balance algorithm
2530   static double glb_call_time = 0; /* Thread balance algorithm call time */
2531 
2532   int running_threads = 0; // Number of running threads in the system.
2533 
2534   DIR *proc_dir = NULL; // Handle of "/proc/" directory.
2535   struct dirent *proc_entry = NULL;
2536 
2537   kmp_str_buf_t task_path; // "/proc/<pid>/task/<tid>/" path.
2538   DIR *task_dir = NULL; // Handle of "/proc/<pid>/task/<tid>/" directory.
2539   struct dirent *task_entry = NULL;
2540   int task_path_fixed_len;
2541 
2542   kmp_str_buf_t stat_path; // "/proc/<pid>/task/<tid>/stat" path.
2543   int stat_file = -1;
2544   int stat_path_fixed_len;
2545 
2546 #ifdef KMP_DEBUG
2547   int total_processes = 0; // Total number of processes in system.
2548 #endif
2549 
2550   double call_time = 0.0;
2551 
2552   __kmp_str_buf_init(&task_path);
2553   __kmp_str_buf_init(&stat_path);
2554 
2555   __kmp_elapsed(&call_time);
2556 
2557   if (glb_call_time &&
2558       (call_time - glb_call_time < __kmp_load_balance_interval)) {
2559     running_threads = glb_running_threads;
2560     goto finish;
2561   }
2562 
2563   glb_call_time = call_time;
2564 
2565   // Do not spend time on scanning "/proc/" if we have a permanent error.
2566   if (permanent_error) {
2567     running_threads = -1;
2568     goto finish;
2569   }
2570 
2571   if (max <= 0) {
2572     max = INT_MAX;
2573   }
2574 
2575   // Open "/proc/" directory.
2576   proc_dir = opendir("/proc");
2577   if (proc_dir == NULL) {
2578     // Cannot open "/proc/". Probably the kernel does not support it. Return an
2579     // error now and in subsequent calls.
2580     running_threads = -1;
2581     permanent_error = 1;
2582     goto finish;
2583   }
2584 
2585   // Initialize fixed part of task_path. This part will not change.
2586   __kmp_str_buf_cat(&task_path, "/proc/", 6);
2587   task_path_fixed_len = task_path.used; // Remember number of used characters.
2588 
2589   proc_entry = readdir(proc_dir);
2590   while (proc_entry != NULL) {
2591     // Proc entry is a directory and name starts with a digit. Assume it is a
2592     // process' directory.
2593     if (proc_entry->d_type == DT_DIR && isdigit(proc_entry->d_name[0])) {
2594 
2595 #ifdef KMP_DEBUG
2596       ++total_processes;
2597 #endif
2598       // Make sure init process is the very first in "/proc", so we can replace
2599       // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes ==
2600       // 1. We are going to check that total_processes == 1 => d_name == "1" is
2601       // true (where "=>" is implication). Since C++ does not have => operator,
2602       // let us replace it with its equivalent: a => b == ! a || b.
2603       KMP_DEBUG_ASSERT(total_processes != 1 ||
2604                        strcmp(proc_entry->d_name, "1") == 0);
2605 
2606       // Construct task_path.
2607       task_path.used = task_path_fixed_len; // Reset task_path to "/proc/".
2608       __kmp_str_buf_cat(&task_path, proc_entry->d_name,
2609                         KMP_STRLEN(proc_entry->d_name));
2610       __kmp_str_buf_cat(&task_path, "/task", 5);
2611 
2612       task_dir = opendir(task_path.str);
2613       if (task_dir == NULL) {
2614         // Process can finish between reading "/proc/" directory entry and
2615         // opening process' "task/" directory. So, in general case we should not
2616         // complain, but have to skip this process and read the next one. But on
2617         // systems with no "task/" support we will spend lot of time to scan
2618         // "/proc/" tree again and again without any benefit. "init" process
2619         // (its pid is 1) should exist always, so, if we cannot open
2620         // "/proc/1/task/" directory, it means "task/" is not supported by
2621         // kernel. Report an error now and in the future.
2622         if (strcmp(proc_entry->d_name, "1") == 0) {
2623           running_threads = -1;
2624           permanent_error = 1;
2625           goto finish;
2626         }
2627       } else {
2628         // Construct fixed part of stat file path.
2629         __kmp_str_buf_clear(&stat_path);
2630         __kmp_str_buf_cat(&stat_path, task_path.str, task_path.used);
2631         __kmp_str_buf_cat(&stat_path, "/", 1);
2632         stat_path_fixed_len = stat_path.used;
2633 
2634         task_entry = readdir(task_dir);
2635         while (task_entry != NULL) {
2636           // It is a directory and name starts with a digit.
2637           if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) {
2638 
2639             // Construct complete stat file path. Easiest way would be:
2640             //  __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str,
2641             //  task_entry->d_name );
2642             // but seriae of __kmp_str_buf_cat works a bit faster.
2643             stat_path.used =
2644                 stat_path_fixed_len; // Reset stat path to its fixed part.
2645             __kmp_str_buf_cat(&stat_path, task_entry->d_name,
2646                               KMP_STRLEN(task_entry->d_name));
2647             __kmp_str_buf_cat(&stat_path, "/stat", 5);
2648 
2649             // Note: Low-level API (open/read/close) is used. High-level API
2650             // (fopen/fclose)  works ~ 30 % slower.
2651             stat_file = open(stat_path.str, O_RDONLY);
2652             if (stat_file == -1) {
2653               // We cannot report an error because task (thread) can terminate
2654               // just before reading this file.
2655             } else {
2656               /* Content of "stat" file looks like:
2657                  24285 (program) S ...
2658 
2659                  It is a single line (if program name does not include funny
2660                  symbols). First number is a thread id, then name of executable
2661                  file name in paretheses, then state of the thread. We need just
2662                  thread state.
2663 
2664                  Good news: Length of program name is 15 characters max. Longer
2665                  names are truncated.
2666 
2667                  Thus, we need rather short buffer: 15 chars for program name +
2668                  2 parenthesis, + 3 spaces + ~7 digits of pid = 37.
2669 
2670                  Bad news: Program name may contain special symbols like space,
2671                  closing parenthesis, or even new line. This makes parsing
2672                  "stat" file not 100 % reliable. In case of fanny program names
2673                  parsing may fail (report incorrect thread state).
2674 
2675                  Parsing "status" file looks more promissing (due to different
2676                  file structure and escaping special symbols) but reading and
2677                  parsing of "status" file works slower.
2678                   -- ln
2679               */
2680               char buffer[65];
2681               ssize_t len;
2682               len = read(stat_file, buffer, sizeof(buffer) - 1);
2683               if (len >= 0) {
2684                 buffer[len] = 0;
2685                 // Using scanf:
2686                 //     sscanf( buffer, "%*d (%*s) %c ", & state );
2687                 // looks very nice, but searching for a closing parenthesis
2688                 // works a bit faster.
2689                 char *close_parent = strstr(buffer, ") ");
2690                 if (close_parent != NULL) {
2691                   char state = *(close_parent + 2);
2692                   if (state == 'R') {
2693                     ++running_threads;
2694                     if (running_threads >= max) {
2695                       goto finish;
2696                     }
2697                   }
2698                 }
2699               }
2700               close(stat_file);
2701               stat_file = -1;
2702             }
2703           }
2704           task_entry = readdir(task_dir);
2705         }
2706         closedir(task_dir);
2707         task_dir = NULL;
2708       }
2709     }
2710     proc_entry = readdir(proc_dir);
2711   }
2712 
2713   // There _might_ be a timing hole where the thread executing this
2714   // code get skipped in the load balance, and running_threads is 0.
2715   // Assert in the debug builds only!!!
2716   KMP_DEBUG_ASSERT(running_threads > 0);
2717   if (running_threads <= 0) {
2718     running_threads = 1;
2719   }
2720 
2721 finish: // Clean up and exit.
2722   if (proc_dir != NULL) {
2723     closedir(proc_dir);
2724   }
2725   __kmp_str_buf_free(&task_path);
2726   if (task_dir != NULL) {
2727     closedir(task_dir);
2728   }
2729   __kmp_str_buf_free(&stat_path);
2730   if (stat_file != -1) {
2731     close(stat_file);
2732   }
2733 
2734   glb_running_threads = running_threads;
2735 
2736   return running_threads;
2737 
2738 } // __kmp_get_load_balance
2739 
2740 #endif // KMP_OS_DARWIN
2741 
2742 #endif // USE_LOAD_BALANCE
2743 
2744 #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC ||                            \
2745       ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) ||                 \
2746       KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 ||            \
2747       KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF ||   \
2748       KMP_ARCH_AARCH64_32)
2749 
2750 // Because WebAssembly will use `call_indirect` to invoke the microtask and
2751 // WebAssembly indirect calls check that the called signature is a precise
2752 // match, we need to cast each microtask function pointer back from `void *` to
2753 // its original type.
2754 typedef void (*microtask_t0)(int *, int *);
2755 typedef void (*microtask_t1)(int *, int *, void *);
2756 typedef void (*microtask_t2)(int *, int *, void *, void *);
2757 typedef void (*microtask_t3)(int *, int *, void *, void *, void *);
2758 typedef void (*microtask_t4)(int *, int *, void *, void *, void *, void *);
2759 typedef void (*microtask_t5)(int *, int *, void *, void *, void *, void *,
2760                              void *);
2761 typedef void (*microtask_t6)(int *, int *, void *, void *, void *, void *,
2762                              void *, void *);
2763 typedef void (*microtask_t7)(int *, int *, void *, void *, void *, void *,
2764                              void *, void *, void *);
2765 typedef void (*microtask_t8)(int *, int *, void *, void *, void *, void *,
2766                              void *, void *, void *, void *);
2767 typedef void (*microtask_t9)(int *, int *, void *, void *, void *, void *,
2768                              void *, void *, void *, void *, void *);
2769 typedef void (*microtask_t10)(int *, int *, void *, void *, void *, void *,
2770                               void *, void *, void *, void *, void *, void *);
2771 typedef void (*microtask_t11)(int *, int *, void *, void *, void *, void *,
2772                               void *, void *, void *, void *, void *, void *,
2773                               void *);
2774 typedef void (*microtask_t12)(int *, int *, void *, void *, void *, void *,
2775                               void *, void *, void *, void *, void *, void *,
2776                               void *, void *);
2777 typedef void (*microtask_t13)(int *, int *, void *, void *, void *, void *,
2778                               void *, void *, void *, void *, void *, void *,
2779                               void *, void *, void *);
2780 typedef void (*microtask_t14)(int *, int *, void *, void *, void *, void *,
2781                               void *, void *, void *, void *, void *, void *,
2782                               void *, void *, void *, void *);
2783 typedef void (*microtask_t15)(int *, int *, void *, void *, void *, void *,
2784                               void *, void *, void *, void *, void *, void *,
2785                               void *, void *, void *, void *, void *);
2786 
2787 // we really only need the case with 1 argument, because CLANG always build
2788 // a struct of pointers to shared variables referenced in the outlined function
2789 int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2790                            void *p_argv[]
2791 #if OMPT_SUPPORT
2792                            ,
2793                            void **exit_frame_ptr
2794 #endif
2795 ) {
2796 #if OMPT_SUPPORT
2797   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2798 #endif
2799 
2800   switch (argc) {
2801   default:
2802     fprintf(stderr, "Too many args to microtask: %d!\n", argc);
2803     fflush(stderr);
2804     exit(-1);
2805   case 0:
2806     (*(microtask_t0)pkfn)(&gtid, &tid);
2807     break;
2808   case 1:
2809     (*(microtask_t1)pkfn)(&gtid, &tid, p_argv[0]);
2810     break;
2811   case 2:
2812     (*(microtask_t2)pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
2813     break;
2814   case 3:
2815     (*(microtask_t3)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
2816     break;
2817   case 4:
2818     (*(microtask_t4)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2819                           p_argv[3]);
2820     break;
2821   case 5:
2822     (*(microtask_t5)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2823                           p_argv[3], p_argv[4]);
2824     break;
2825   case 6:
2826     (*(microtask_t6)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2827                           p_argv[3], p_argv[4], p_argv[5]);
2828     break;
2829   case 7:
2830     (*(microtask_t7)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2831                           p_argv[3], p_argv[4], p_argv[5], p_argv[6]);
2832     break;
2833   case 8:
2834     (*(microtask_t8)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2835                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
2836                           p_argv[7]);
2837     break;
2838   case 9:
2839     (*(microtask_t9)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2840                           p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7],
2841                           p_argv[8]);
2842     break;
2843   case 10:
2844     (*(microtask_t10)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2845                            p_argv[3], p_argv[4], p_argv[5], p_argv[6],
2846                            p_argv[7], p_argv[8], p_argv[9]);
2847     break;
2848   case 11:
2849     (*(microtask_t11)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2850                            p_argv[3], p_argv[4], p_argv[5], p_argv[6],
2851                            p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
2852     break;
2853   case 12:
2854     (*(microtask_t12)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2855                            p_argv[3], p_argv[4], p_argv[5], p_argv[6],
2856                            p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2857                            p_argv[11]);
2858     break;
2859   case 13:
2860     (*(microtask_t13)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2861                            p_argv[3], p_argv[4], p_argv[5], p_argv[6],
2862                            p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2863                            p_argv[11], p_argv[12]);
2864     break;
2865   case 14:
2866     (*(microtask_t14)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2867                            p_argv[3], p_argv[4], p_argv[5], p_argv[6],
2868                            p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2869                            p_argv[11], p_argv[12], p_argv[13]);
2870     break;
2871   case 15:
2872     (*(microtask_t15)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
2873                            p_argv[3], p_argv[4], p_argv[5], p_argv[6],
2874                            p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2875                            p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
2876     break;
2877   }
2878 
2879   return 1;
2880 }
2881 
2882 #endif
2883 
2884 #if KMP_OS_LINUX
2885 // Functions for hidden helper task
2886 namespace {
2887 // Condition variable for initializing hidden helper team
2888 pthread_cond_t hidden_helper_threads_initz_cond_var;
2889 pthread_mutex_t hidden_helper_threads_initz_lock;
2890 volatile int hidden_helper_initz_signaled = FALSE;
2891 
2892 // Condition variable for deinitializing hidden helper team
2893 pthread_cond_t hidden_helper_threads_deinitz_cond_var;
2894 pthread_mutex_t hidden_helper_threads_deinitz_lock;
2895 volatile int hidden_helper_deinitz_signaled = FALSE;
2896 
2897 // Condition variable for the wrapper function of main thread
2898 pthread_cond_t hidden_helper_main_thread_cond_var;
2899 pthread_mutex_t hidden_helper_main_thread_lock;
2900 volatile int hidden_helper_main_thread_signaled = FALSE;
2901 
2902 // Semaphore for worker threads. We don't use condition variable here in case
2903 // that when multiple signals are sent at the same time, only one thread might
2904 // be waken.
2905 sem_t hidden_helper_task_sem;
2906 } // namespace
2907 
2908 void __kmp_hidden_helper_worker_thread_wait() {
2909   int status = sem_wait(&hidden_helper_task_sem);
2910   KMP_CHECK_SYSFAIL("sem_wait", status);
2911 }
2912 
2913 void __kmp_do_initialize_hidden_helper_threads() {
2914   // Initialize condition variable
2915   int status =
2916       pthread_cond_init(&hidden_helper_threads_initz_cond_var, nullptr);
2917   KMP_CHECK_SYSFAIL("pthread_cond_init", status);
2918 
2919   status = pthread_cond_init(&hidden_helper_threads_deinitz_cond_var, nullptr);
2920   KMP_CHECK_SYSFAIL("pthread_cond_init", status);
2921 
2922   status = pthread_cond_init(&hidden_helper_main_thread_cond_var, nullptr);
2923   KMP_CHECK_SYSFAIL("pthread_cond_init", status);
2924 
2925   status = pthread_mutex_init(&hidden_helper_threads_initz_lock, nullptr);
2926   KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
2927 
2928   status = pthread_mutex_init(&hidden_helper_threads_deinitz_lock, nullptr);
2929   KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
2930 
2931   status = pthread_mutex_init(&hidden_helper_main_thread_lock, nullptr);
2932   KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
2933 
2934   // Initialize the semaphore
2935   status = sem_init(&hidden_helper_task_sem, 0, 0);
2936   KMP_CHECK_SYSFAIL("sem_init", status);
2937 
2938   // Create a new thread to finish initialization
2939   pthread_t handle;
2940   status = pthread_create(
2941       &handle, nullptr,
2942       [](void *) -> void * {
2943         __kmp_hidden_helper_threads_initz_routine();
2944         return nullptr;
2945       },
2946       nullptr);
2947   KMP_CHECK_SYSFAIL("pthread_create", status);
2948 }
2949 
2950 void __kmp_hidden_helper_threads_initz_wait() {
2951   // Initial thread waits here for the completion of the initialization. The
2952   // condition variable will be notified by main thread of hidden helper teams.
2953   int status = pthread_mutex_lock(&hidden_helper_threads_initz_lock);
2954   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
2955 
2956   if (!TCR_4(hidden_helper_initz_signaled)) {
2957     status = pthread_cond_wait(&hidden_helper_threads_initz_cond_var,
2958                                &hidden_helper_threads_initz_lock);
2959     KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
2960   }
2961 
2962   status = pthread_mutex_unlock(&hidden_helper_threads_initz_lock);
2963   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
2964 }
2965 
2966 void __kmp_hidden_helper_initz_release() {
2967   // After all initialization, reset __kmp_init_hidden_helper_threads to false.
2968   int status = pthread_mutex_lock(&hidden_helper_threads_initz_lock);
2969   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
2970 
2971   status = pthread_cond_signal(&hidden_helper_threads_initz_cond_var);
2972   KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
2973 
2974   TCW_SYNC_4(hidden_helper_initz_signaled, TRUE);
2975 
2976   status = pthread_mutex_unlock(&hidden_helper_threads_initz_lock);
2977   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
2978 }
2979 
2980 void __kmp_hidden_helper_main_thread_wait() {
2981   // The main thread of hidden helper team will be blocked here. The
2982   // condition variable can only be signal in the destructor of RTL.
2983   int status = pthread_mutex_lock(&hidden_helper_main_thread_lock);
2984   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
2985 
2986   if (!TCR_4(hidden_helper_main_thread_signaled)) {
2987     status = pthread_cond_wait(&hidden_helper_main_thread_cond_var,
2988                                &hidden_helper_main_thread_lock);
2989     KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
2990   }
2991 
2992   status = pthread_mutex_unlock(&hidden_helper_main_thread_lock);
2993   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
2994 }
2995 
2996 void __kmp_hidden_helper_main_thread_release() {
2997   // The initial thread of OpenMP RTL should call this function to wake up the
2998   // main thread of hidden helper team.
2999   int status = pthread_mutex_lock(&hidden_helper_main_thread_lock);
3000   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
3001 
3002   status = pthread_cond_signal(&hidden_helper_main_thread_cond_var);
3003   KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
3004 
3005   // The hidden helper team is done here
3006   TCW_SYNC_4(hidden_helper_main_thread_signaled, TRUE);
3007 
3008   status = pthread_mutex_unlock(&hidden_helper_main_thread_lock);
3009   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
3010 }
3011 
3012 void __kmp_hidden_helper_worker_thread_signal() {
3013   int status = sem_post(&hidden_helper_task_sem);
3014   KMP_CHECK_SYSFAIL("sem_post", status);
3015 }
3016 
3017 void __kmp_hidden_helper_threads_deinitz_wait() {
3018   // Initial thread waits here for the completion of the deinitialization. The
3019   // condition variable will be notified by main thread of hidden helper teams.
3020   int status = pthread_mutex_lock(&hidden_helper_threads_deinitz_lock);
3021   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
3022 
3023   if (!TCR_4(hidden_helper_deinitz_signaled)) {
3024     status = pthread_cond_wait(&hidden_helper_threads_deinitz_cond_var,
3025                                &hidden_helper_threads_deinitz_lock);
3026     KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
3027   }
3028 
3029   status = pthread_mutex_unlock(&hidden_helper_threads_deinitz_lock);
3030   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
3031 }
3032 
3033 void __kmp_hidden_helper_threads_deinitz_release() {
3034   int status = pthread_mutex_lock(&hidden_helper_threads_deinitz_lock);
3035   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
3036 
3037   status = pthread_cond_signal(&hidden_helper_threads_deinitz_cond_var);
3038   KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
3039 
3040   TCW_SYNC_4(hidden_helper_deinitz_signaled, TRUE);
3041 
3042   status = pthread_mutex_unlock(&hidden_helper_threads_deinitz_lock);
3043   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
3044 }
3045 #else // KMP_OS_LINUX
3046 void __kmp_hidden_helper_worker_thread_wait() {
3047   KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
3048 }
3049 
3050 void __kmp_do_initialize_hidden_helper_threads() {
3051   KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
3052 }
3053 
3054 void __kmp_hidden_helper_threads_initz_wait() {
3055   KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
3056 }
3057 
3058 void __kmp_hidden_helper_initz_release() {
3059   KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
3060 }
3061 
3062 void __kmp_hidden_helper_main_thread_wait() {
3063   KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
3064 }
3065 
3066 void __kmp_hidden_helper_main_thread_release() {
3067   KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
3068 }
3069 
3070 void __kmp_hidden_helper_worker_thread_signal() {
3071   KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
3072 }
3073 
3074 void __kmp_hidden_helper_threads_deinitz_wait() {
3075   KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
3076 }
3077 
3078 void __kmp_hidden_helper_threads_deinitz_release() {
3079   KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
3080 }
3081 #endif // KMP_OS_LINUX
3082 
3083 bool __kmp_detect_shm() {
3084   DIR *dir = opendir("/dev/shm");
3085   if (dir) { // /dev/shm exists
3086     closedir(dir);
3087     return true;
3088   } else if (ENOENT == errno) { // /dev/shm does not exist
3089     return false;
3090   } else { // opendir() failed
3091     return false;
3092   }
3093 }
3094 
3095 bool __kmp_detect_tmp() {
3096   DIR *dir = opendir("/tmp");
3097   if (dir) { // /tmp exists
3098     closedir(dir);
3099     return true;
3100   } else if (ENOENT == errno) { // /tmp does not exist
3101     return false;
3102   } else { // opendir() failed
3103     return false;
3104   }
3105 }
3106 
3107 // end of file //
3108