xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_affinity.h -- header for affinity management
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H
140b57cec5SDimitry Andric #define KMP_AFFINITY_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "kmp.h"
170b57cec5SDimitry Andric #include "kmp_os.h"
180eae32dcSDimitry Andric #include <limits>
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
210b57cec5SDimitry Andric #if KMP_USE_HWLOC
220b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity {
230b57cec5SDimitry Andric public:
240b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
250b57cec5SDimitry Andric     hwloc_cpuset_t mask;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric   public:
280b57cec5SDimitry Andric     Mask() {
290b57cec5SDimitry Andric       mask = hwloc_bitmap_alloc();
300b57cec5SDimitry Andric       this->zero();
310b57cec5SDimitry Andric     }
320b57cec5SDimitry Andric     ~Mask() { hwloc_bitmap_free(mask); }
330b57cec5SDimitry Andric     void set(int i) override { hwloc_bitmap_set(mask, i); }
340b57cec5SDimitry Andric     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
350b57cec5SDimitry Andric     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
360b57cec5SDimitry Andric     void zero() override { hwloc_bitmap_zero(mask); }
375f757f3fSDimitry Andric     bool empty() const override { return hwloc_bitmap_iszero(mask); }
380b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
390b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
400b57cec5SDimitry Andric       hwloc_bitmap_copy(mask, convert->mask);
410b57cec5SDimitry Andric     }
420b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
430b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
440b57cec5SDimitry Andric       hwloc_bitmap_and(mask, mask, convert->mask);
450b57cec5SDimitry Andric     }
460b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
470b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
480b57cec5SDimitry Andric       hwloc_bitmap_or(mask, mask, convert->mask);
490b57cec5SDimitry Andric     }
500b57cec5SDimitry Andric     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
515f757f3fSDimitry Andric     bool is_equal(const KMPAffinity::Mask *rhs) const override {
525f757f3fSDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
535f757f3fSDimitry Andric       return hwloc_bitmap_isequal(mask, convert->mask);
545f757f3fSDimitry Andric     }
550b57cec5SDimitry Andric     int begin() const override { return hwloc_bitmap_first(mask); }
560b57cec5SDimitry Andric     int end() const override { return -1; }
570b57cec5SDimitry Andric     int next(int previous) const override {
580b57cec5SDimitry Andric       return hwloc_bitmap_next(mask, previous);
590b57cec5SDimitry Andric     }
600b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
610b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
620b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
63e8d8bef9SDimitry Andric       long retval =
640b57cec5SDimitry Andric           hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
650b57cec5SDimitry Andric       if (retval >= 0) {
660b57cec5SDimitry Andric         return 0;
670b57cec5SDimitry Andric       }
680b57cec5SDimitry Andric       int error = errno;
690b57cec5SDimitry Andric       if (abort_on_error) {
7006c3fb27SDimitry Andric         __kmp_fatal(KMP_MSG(FunctionError, "hwloc_get_cpubind()"),
7106c3fb27SDimitry Andric                     KMP_ERR(error), __kmp_msg_null);
720b57cec5SDimitry Andric       }
730b57cec5SDimitry Andric       return error;
740b57cec5SDimitry Andric     }
750b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
760b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
77e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
78e8d8bef9SDimitry Andric       long retval =
790b57cec5SDimitry Andric           hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
800b57cec5SDimitry Andric       if (retval >= 0) {
810b57cec5SDimitry Andric         return 0;
820b57cec5SDimitry Andric       }
830b57cec5SDimitry Andric       int error = errno;
840b57cec5SDimitry Andric       if (abort_on_error) {
8506c3fb27SDimitry Andric         __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"),
8606c3fb27SDimitry Andric                     KMP_ERR(error), __kmp_msg_null);
870b57cec5SDimitry Andric       }
880b57cec5SDimitry Andric       return error;
890b57cec5SDimitry Andric     }
90e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS
91e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
92e8d8bef9SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
93e8d8bef9SDimitry Andric                   "Illegal set process affinity operation when not capable");
94e8d8bef9SDimitry Andric       int error = 0;
95e8d8bef9SDimitry Andric       const hwloc_topology_support *support =
96e8d8bef9SDimitry Andric           hwloc_topology_get_support(__kmp_hwloc_topology);
97e8d8bef9SDimitry Andric       if (support->cpubind->set_proc_cpubind) {
98e8d8bef9SDimitry Andric         int retval;
99e8d8bef9SDimitry Andric         retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
100e8d8bef9SDimitry Andric                                    HWLOC_CPUBIND_PROCESS);
101e8d8bef9SDimitry Andric         if (retval >= 0)
102e8d8bef9SDimitry Andric           return 0;
103e8d8bef9SDimitry Andric         error = errno;
104e8d8bef9SDimitry Andric         if (abort_on_error)
10506c3fb27SDimitry Andric           __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"),
10606c3fb27SDimitry Andric                       KMP_ERR(error), __kmp_msg_null);
107e8d8bef9SDimitry Andric       }
108e8d8bef9SDimitry Andric       return error;
109e8d8bef9SDimitry Andric     }
110e8d8bef9SDimitry Andric #endif
1110b57cec5SDimitry Andric     int get_proc_group() const override {
1120b57cec5SDimitry Andric       int group = -1;
1130b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1140b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
1150b57cec5SDimitry Andric         return 1;
1160b57cec5SDimitry Andric       }
1170b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
1180b57cec5SDimitry Andric         // On windows, the long type is always 32 bits
1190b57cec5SDimitry Andric         unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
1200b57cec5SDimitry Andric         unsigned long second_32_bits =
1210b57cec5SDimitry Andric             hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
1220b57cec5SDimitry Andric         if (first_32_bits == 0 && second_32_bits == 0) {
1230b57cec5SDimitry Andric           continue;
1240b57cec5SDimitry Andric         }
1250b57cec5SDimitry Andric         if (group >= 0) {
1260b57cec5SDimitry Andric           return -1;
1270b57cec5SDimitry Andric         }
1280b57cec5SDimitry Andric         group = i;
1290b57cec5SDimitry Andric       }
1300b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1310b57cec5SDimitry Andric       return group;
1320b57cec5SDimitry Andric     }
1330b57cec5SDimitry Andric   };
1340b57cec5SDimitry Andric   void determine_capable(const char *var) override {
1350b57cec5SDimitry Andric     const hwloc_topology_support *topology_support;
1360b57cec5SDimitry Andric     if (__kmp_hwloc_topology == NULL) {
1370b57cec5SDimitry Andric       if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
1380b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
139bdd1243dSDimitry Andric         if (__kmp_affinity.flags.verbose) {
1400b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
1410b57cec5SDimitry Andric         }
142bdd1243dSDimitry Andric       }
1430b57cec5SDimitry Andric       if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
1440b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
145bdd1243dSDimitry Andric         if (__kmp_affinity.flags.verbose) {
1460b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
1470b57cec5SDimitry Andric         }
1480b57cec5SDimitry Andric       }
149bdd1243dSDimitry Andric     }
1500b57cec5SDimitry Andric     topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
1510b57cec5SDimitry Andric     // Is the system capable of setting/getting this thread's affinity?
1520b57cec5SDimitry Andric     // Also, is topology discovery possible? (pu indicates ability to discover
1530b57cec5SDimitry Andric     // processing units). And finally, were there no errors when calling any
1540b57cec5SDimitry Andric     // hwloc_* API functions?
1550b57cec5SDimitry Andric     if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
1560b57cec5SDimitry Andric         topology_support->cpubind->get_thisthread_cpubind &&
1570b57cec5SDimitry Andric         topology_support->discovery->pu && !__kmp_hwloc_error) {
1580b57cec5SDimitry Andric       // enables affinity according to KMP_AFFINITY_CAPABLE() macro
1590b57cec5SDimitry Andric       KMP_AFFINITY_ENABLE(TRUE);
1600b57cec5SDimitry Andric     } else {
1610b57cec5SDimitry Andric       // indicate that hwloc didn't work and disable affinity
1620b57cec5SDimitry Andric       __kmp_hwloc_error = TRUE;
1630b57cec5SDimitry Andric       KMP_AFFINITY_DISABLE();
1640b57cec5SDimitry Andric     }
1650b57cec5SDimitry Andric   }
1660b57cec5SDimitry Andric   void bind_thread(int which) override {
1670b57cec5SDimitry Andric     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
1680b57cec5SDimitry Andric                 "Illegal set affinity operation when not capable");
1690b57cec5SDimitry Andric     KMPAffinity::Mask *mask;
1700b57cec5SDimitry Andric     KMP_CPU_ALLOC_ON_STACK(mask);
1710b57cec5SDimitry Andric     KMP_CPU_ZERO(mask);
1720b57cec5SDimitry Andric     KMP_CPU_SET(which, mask);
1730b57cec5SDimitry Andric     __kmp_set_system_affinity(mask, TRUE);
1740b57cec5SDimitry Andric     KMP_CPU_FREE_FROM_STACK(mask);
1750b57cec5SDimitry Andric   }
1760b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
1770b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
1780b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
1790b57cec5SDimitry Andric     return new Mask[num];
1800b57cec5SDimitry Andric   }
1810b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
1820b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1830b57cec5SDimitry Andric     delete[] hwloc_array;
1840b57cec5SDimitry Andric   }
1850b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
1860b57cec5SDimitry Andric                                       int index) override {
1870b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1880b57cec5SDimitry Andric     return &(hwloc_array[index]);
1890b57cec5SDimitry Andric   }
1900b57cec5SDimitry Andric   api_type get_api_type() const override { return HWLOC; }
1910b57cec5SDimitry Andric };
1920b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */
1930b57cec5SDimitry Andric 
194*0fca6ea1SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||     \
195*0fca6ea1SDimitry Andric     KMP_OS_AIX
1960b57cec5SDimitry Andric #if KMP_OS_LINUX
1970b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present
1980b57cec5SDimitry Andric    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
1990b57cec5SDimitry Andric    all systems of the same arch where they are defined, and they cannot change.
2000b57cec5SDimitry Andric    stone forever. */
2010b57cec5SDimitry Andric #include <sys/syscall.h>
2020b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM
2030b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2040b57cec5SDimitry Andric #define __NR_sched_setaffinity 241
2050b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241
2060b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2070b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2080b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2090b57cec5SDimitry Andric #define __NR_sched_getaffinity 242
2100b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242
2110b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2120b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2130b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64
2140b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2150b57cec5SDimitry Andric #define __NR_sched_setaffinity 122
2160b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122
2170b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2180b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2190b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2200b57cec5SDimitry Andric #define __NR_sched_getaffinity 123
2210b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123
2220b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2230b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2240b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
2250b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2260b57cec5SDimitry Andric #define __NR_sched_setaffinity 203
2270b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203
2280b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2290b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2300b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2310b57cec5SDimitry Andric #define __NR_sched_getaffinity 204
2320b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204
2330b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2340b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2350b57cec5SDimitry Andric #elif KMP_ARCH_PPC64
2360b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2370b57cec5SDimitry Andric #define __NR_sched_setaffinity 222
2380b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222
2390b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2400b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2410b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2420b57cec5SDimitry Andric #define __NR_sched_getaffinity 223
2430b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223
2440b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2450b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2460b57cec5SDimitry Andric #elif KMP_ARCH_MIPS
2470b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2480b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239
2490b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239
2500b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2510b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2520b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2530b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240
2540b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240
2550b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2560b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2570b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64
2580b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2590b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195
2600b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195
2610b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2620b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2630b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2640b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196
2650b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196
2660b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2670b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
268bdd1243dSDimitry Andric #elif KMP_ARCH_LOONGARCH64
269bdd1243dSDimitry Andric #ifndef __NR_sched_setaffinity
270bdd1243dSDimitry Andric #define __NR_sched_setaffinity 122
271bdd1243dSDimitry Andric #elif __NR_sched_setaffinity != 122
272bdd1243dSDimitry Andric #error Wrong code for setaffinity system call.
273bdd1243dSDimitry Andric #endif /* __NR_sched_setaffinity */
274bdd1243dSDimitry Andric #ifndef __NR_sched_getaffinity
275bdd1243dSDimitry Andric #define __NR_sched_getaffinity 123
276bdd1243dSDimitry Andric #elif __NR_sched_getaffinity != 123
277bdd1243dSDimitry Andric #error Wrong code for getaffinity system call.
278bdd1243dSDimitry Andric #endif /* __NR_sched_getaffinity */
279bdd1243dSDimitry Andric #elif KMP_ARCH_RISCV64
280bdd1243dSDimitry Andric #ifndef __NR_sched_setaffinity
281bdd1243dSDimitry Andric #define __NR_sched_setaffinity 122
282bdd1243dSDimitry Andric #elif __NR_sched_setaffinity != 122
283bdd1243dSDimitry Andric #error Wrong code for setaffinity system call.
284bdd1243dSDimitry Andric #endif /* __NR_sched_setaffinity */
285bdd1243dSDimitry Andric #ifndef __NR_sched_getaffinity
286bdd1243dSDimitry Andric #define __NR_sched_getaffinity 123
287bdd1243dSDimitry Andric #elif __NR_sched_getaffinity != 123
288bdd1243dSDimitry Andric #error Wrong code for getaffinity system call.
289bdd1243dSDimitry Andric #endif /* __NR_sched_getaffinity */
2905f757f3fSDimitry Andric #elif KMP_ARCH_VE
2915f757f3fSDimitry Andric #ifndef __NR_sched_setaffinity
2925f757f3fSDimitry Andric #define __NR_sched_setaffinity 203
2935f757f3fSDimitry Andric #elif __NR_sched_setaffinity != 203
2945f757f3fSDimitry Andric #error Wrong code for setaffinity system call.
2955f757f3fSDimitry Andric #endif /* __NR_sched_setaffinity */
2965f757f3fSDimitry Andric #ifndef __NR_sched_getaffinity
2975f757f3fSDimitry Andric #define __NR_sched_getaffinity 204
2985f757f3fSDimitry Andric #elif __NR_sched_getaffinity != 204
2995f757f3fSDimitry Andric #error Wrong code for getaffinity system call.
3005f757f3fSDimitry Andric #endif /* __NR_sched_getaffinity */
3015f757f3fSDimitry Andric #elif KMP_ARCH_S390X
3025f757f3fSDimitry Andric #ifndef __NR_sched_setaffinity
3035f757f3fSDimitry Andric #define __NR_sched_setaffinity 239
3045f757f3fSDimitry Andric #elif __NR_sched_setaffinity != 239
3055f757f3fSDimitry Andric #error Wrong code for setaffinity system call.
3065f757f3fSDimitry Andric #endif /* __NR_sched_setaffinity */
3075f757f3fSDimitry Andric #ifndef __NR_sched_getaffinity
3085f757f3fSDimitry Andric #define __NR_sched_getaffinity 240
3095f757f3fSDimitry Andric #elif __NR_sched_getaffinity != 240
3105f757f3fSDimitry Andric #error Wrong code for getaffinity system call.
3115f757f3fSDimitry Andric #endif /* __NR_sched_getaffinity */
312bdd1243dSDimitry Andric #else
3130b57cec5SDimitry Andric #error Unknown or unsupported architecture
3140b57cec5SDimitry Andric #endif /* KMP_ARCH_* */
315*0fca6ea1SDimitry Andric #elif KMP_OS_FREEBSD || KMP_OS_DRAGONFLY
316489b1cf2SDimitry Andric #include <pthread.h>
317489b1cf2SDimitry Andric #include <pthread_np.h>
318*0fca6ea1SDimitry Andric #elif KMP_OS_NETBSD
319*0fca6ea1SDimitry Andric #include <pthread.h>
320*0fca6ea1SDimitry Andric #include <sched.h>
321439352acSDimitry Andric #elif KMP_OS_AIX
322439352acSDimitry Andric #include <sys/dr.h>
323439352acSDimitry Andric #include <sys/rset.h>
324439352acSDimitry Andric #define VMI_MAXRADS 64 // Maximum number of RADs allowed by AIX.
325*0fca6ea1SDimitry Andric #define GET_NUMBER_SMT_SETS 0x0004
326*0fca6ea1SDimitry Andric extern "C" int syssmt(int flags, int, int, int *);
327489b1cf2SDimitry Andric #endif
3280b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
3290b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
330e8d8bef9SDimitry Andric     typedef unsigned long mask_t;
331e8d8bef9SDimitry Andric     typedef decltype(__kmp_affin_mask_size) mask_size_type;
332e8d8bef9SDimitry Andric     static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
333e8d8bef9SDimitry Andric     static const mask_t ONE = 1;
334e8d8bef9SDimitry Andric     mask_size_type get_num_mask_types() const {
335e8d8bef9SDimitry Andric       return __kmp_affin_mask_size / sizeof(mask_t);
336e8d8bef9SDimitry Andric     }
3370b57cec5SDimitry Andric 
3380b57cec5SDimitry Andric   public:
3390b57cec5SDimitry Andric     mask_t *mask;
3400b57cec5SDimitry Andric     Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
3410b57cec5SDimitry Andric     ~Mask() {
3420b57cec5SDimitry Andric       if (mask)
3430b57cec5SDimitry Andric         __kmp_free(mask);
3440b57cec5SDimitry Andric     }
3450b57cec5SDimitry Andric     void set(int i) override {
346e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
3470b57cec5SDimitry Andric     }
3480b57cec5SDimitry Andric     bool is_set(int i) const override {
349e8d8bef9SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
3500b57cec5SDimitry Andric     }
3510b57cec5SDimitry Andric     void clear(int i) override {
352e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
3530b57cec5SDimitry Andric     }
3540b57cec5SDimitry Andric     void zero() override {
355e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
356e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
357e8d8bef9SDimitry Andric         mask[i] = (mask_t)0;
3580b57cec5SDimitry Andric     }
3595f757f3fSDimitry Andric     bool empty() const override {
3605f757f3fSDimitry Andric       mask_size_type e = get_num_mask_types();
3615f757f3fSDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3625f757f3fSDimitry Andric         if (mask[i] != (mask_t)0)
3635f757f3fSDimitry Andric           return false;
3645f757f3fSDimitry Andric       return true;
3655f757f3fSDimitry Andric     }
3660b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
3670b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
368e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
369e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3700b57cec5SDimitry Andric         mask[i] = convert->mask[i];
3710b57cec5SDimitry Andric     }
3720b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
3730b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
374e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
375e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3760b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
3770b57cec5SDimitry Andric     }
3780b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
3790b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
380e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
381e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3820b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
3830b57cec5SDimitry Andric     }
3840b57cec5SDimitry Andric     void bitwise_not() override {
385e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
386e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3870b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
3880b57cec5SDimitry Andric     }
3895f757f3fSDimitry Andric     bool is_equal(const KMPAffinity::Mask *rhs) const override {
3905f757f3fSDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
3915f757f3fSDimitry Andric       mask_size_type e = get_num_mask_types();
3925f757f3fSDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3935f757f3fSDimitry Andric         if (mask[i] != convert->mask[i])
3945f757f3fSDimitry Andric           return false;
3955f757f3fSDimitry Andric       return true;
3965f757f3fSDimitry Andric     }
3970b57cec5SDimitry Andric     int begin() const override {
3980b57cec5SDimitry Andric       int retval = 0;
3990b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4000b57cec5SDimitry Andric         ++retval;
4010b57cec5SDimitry Andric       return retval;
4020b57cec5SDimitry Andric     }
403e8d8bef9SDimitry Andric     int end() const override {
404e8d8bef9SDimitry Andric       int e;
405e8d8bef9SDimitry Andric       __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
406e8d8bef9SDimitry Andric       return e;
407e8d8bef9SDimitry Andric     }
4080b57cec5SDimitry Andric     int next(int previous) const override {
4090b57cec5SDimitry Andric       int retval = previous + 1;
4100b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4110b57cec5SDimitry Andric         ++retval;
4120b57cec5SDimitry Andric       return retval;
4130b57cec5SDimitry Andric     }
414439352acSDimitry Andric #if KMP_OS_AIX
415439352acSDimitry Andric     // On AIX, we don't have a way to get CPU(s) a thread is bound to.
416439352acSDimitry Andric     // This routine is only used to get the full mask.
417439352acSDimitry Andric     int get_system_affinity(bool abort_on_error) override {
418439352acSDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
419439352acSDimitry Andric                   "Illegal get affinity operation when not capable");
420439352acSDimitry Andric 
421439352acSDimitry Andric       (void)abort_on_error;
422439352acSDimitry Andric 
423439352acSDimitry Andric       // Set the mask with all CPUs that are available.
424439352acSDimitry Andric       for (int i = 0; i < __kmp_xproc; ++i)
425439352acSDimitry Andric         KMP_CPU_SET(i, this);
426439352acSDimitry Andric       return 0;
427439352acSDimitry Andric     }
428439352acSDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
429439352acSDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
430439352acSDimitry Andric 
431439352acSDimitry Andric                   "Illegal set affinity operation when not capable");
432439352acSDimitry Andric 
433439352acSDimitry Andric       int location;
434439352acSDimitry Andric       int gtid = __kmp_entry_gtid();
435439352acSDimitry Andric       int tid = thread_self();
436439352acSDimitry Andric 
437439352acSDimitry Andric       // Unbind the thread if it was bound to any processors before so that
438439352acSDimitry Andric       // we can bind the thread to CPUs specified by the mask not others.
439439352acSDimitry Andric       int retval = bindprocessor(BINDTHREAD, tid, PROCESSOR_CLASS_ANY);
440439352acSDimitry Andric 
441439352acSDimitry Andric       // On AIX, we can only bind to one instead of a set of CPUs with the
442439352acSDimitry Andric       // bindprocessor() system call.
443439352acSDimitry Andric       KMP_CPU_SET_ITERATE(location, this) {
444439352acSDimitry Andric         if (KMP_CPU_ISSET(location, this)) {
445439352acSDimitry Andric           retval = bindprocessor(BINDTHREAD, tid, location);
446439352acSDimitry Andric           if (retval == -1 && errno == 1) {
447439352acSDimitry Andric             rsid_t rsid;
448439352acSDimitry Andric             rsethandle_t rsh;
449439352acSDimitry Andric             // Put something in rsh to prevent compiler warning
450439352acSDimitry Andric             // about uninitalized use
451439352acSDimitry Andric             rsh = rs_alloc(RS_EMPTY);
452439352acSDimitry Andric             rsid.at_pid = getpid();
453439352acSDimitry Andric             if (RS_DEFAULT_RSET != ra_getrset(R_PROCESS, rsid, 0, rsh)) {
454439352acSDimitry Andric               retval = ra_detachrset(R_PROCESS, rsid, 0);
455439352acSDimitry Andric               retval = bindprocessor(BINDTHREAD, tid, location);
456439352acSDimitry Andric             }
457439352acSDimitry Andric           }
458439352acSDimitry Andric           if (retval == 0) {
459439352acSDimitry Andric             KA_TRACE(10, ("__kmp_set_system_affinity:  Done binding "
460439352acSDimitry Andric                           "T#%d to cpu=%d.\n",
461439352acSDimitry Andric                           gtid, location));
462439352acSDimitry Andric             continue;
463439352acSDimitry Andric           }
464439352acSDimitry Andric           int error = errno;
465439352acSDimitry Andric           if (abort_on_error) {
466439352acSDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "bindprocessor()"),
467439352acSDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
468439352acSDimitry Andric             KA_TRACE(10, ("__kmp_set_system_affinity:  Error binding "
469439352acSDimitry Andric                           "T#%d to cpu=%d, errno=%d.\n",
470439352acSDimitry Andric                           gtid, location, error));
471439352acSDimitry Andric             return error;
472439352acSDimitry Andric           }
473439352acSDimitry Andric         }
474439352acSDimitry Andric       }
475439352acSDimitry Andric       return 0;
476439352acSDimitry Andric     }
477439352acSDimitry Andric #else // !KMP_OS_AIX
4780b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
4790b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
4800b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
481489b1cf2SDimitry Andric #if KMP_OS_LINUX
482e8d8bef9SDimitry Andric       long retval =
4830b57cec5SDimitry Andric           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
484*0fca6ea1SDimitry Andric #elif KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY
485fe6060f1SDimitry Andric       int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
486fe6060f1SDimitry Andric                                      reinterpret_cast<cpuset_t *>(mask));
4875ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
488489b1cf2SDimitry Andric #endif
4890b57cec5SDimitry Andric       if (retval >= 0) {
4900b57cec5SDimitry Andric         return 0;
4910b57cec5SDimitry Andric       }
4920b57cec5SDimitry Andric       int error = errno;
4930b57cec5SDimitry Andric       if (abort_on_error) {
49406c3fb27SDimitry Andric         __kmp_fatal(KMP_MSG(FunctionError, "pthread_getaffinity_np()"),
49506c3fb27SDimitry Andric                     KMP_ERR(error), __kmp_msg_null);
4960b57cec5SDimitry Andric       }
4970b57cec5SDimitry Andric       return error;
4980b57cec5SDimitry Andric     }
4990b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
5000b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
501e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
502489b1cf2SDimitry Andric #if KMP_OS_LINUX
503e8d8bef9SDimitry Andric       long retval =
5040b57cec5SDimitry Andric           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
505*0fca6ea1SDimitry Andric #elif KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY
506fe6060f1SDimitry Andric       int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
507fe6060f1SDimitry Andric                                      reinterpret_cast<cpuset_t *>(mask));
5085ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
509489b1cf2SDimitry Andric #endif
5100b57cec5SDimitry Andric       if (retval >= 0) {
5110b57cec5SDimitry Andric         return 0;
5120b57cec5SDimitry Andric       }
5130b57cec5SDimitry Andric       int error = errno;
5140b57cec5SDimitry Andric       if (abort_on_error) {
51506c3fb27SDimitry Andric         __kmp_fatal(KMP_MSG(FunctionError, "pthread_setaffinity_np()"),
51606c3fb27SDimitry Andric                     KMP_ERR(error), __kmp_msg_null);
5170b57cec5SDimitry Andric       }
5180b57cec5SDimitry Andric       return error;
5190b57cec5SDimitry Andric     }
520439352acSDimitry Andric #endif // KMP_OS_AIX
5210b57cec5SDimitry Andric   };
5220b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
5230b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
5240b57cec5SDimitry Andric   }
5250b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
5260b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override {
5270b57cec5SDimitry Andric     KMPNativeAffinity::Mask *retval = new Mask();
5280b57cec5SDimitry Andric     return retval;
5290b57cec5SDimitry Andric   }
5300b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override {
5310b57cec5SDimitry Andric     KMPNativeAffinity::Mask *native_mask =
5320b57cec5SDimitry Andric         static_cast<KMPNativeAffinity::Mask *>(m);
5330b57cec5SDimitry Andric     delete native_mask;
5340b57cec5SDimitry Andric   }
5350b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
5360b57cec5SDimitry Andric     return new Mask[num];
5370b57cec5SDimitry Andric   }
5380b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
5390b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
5400b57cec5SDimitry Andric     delete[] linux_array;
5410b57cec5SDimitry Andric   }
5420b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
5430b57cec5SDimitry Andric                                       int index) override {
5440b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
5450b57cec5SDimitry Andric     return &(linux_array[index]);
5460b57cec5SDimitry Andric   }
5470b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
5480b57cec5SDimitry Andric };
549*0fca6ea1SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY  \
550*0fca6ea1SDimitry Andric           || KMP_OS_AIX */
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric #if KMP_OS_WINDOWS
5530b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
5540b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
5550b57cec5SDimitry Andric     typedef ULONG_PTR mask_t;
5560b57cec5SDimitry Andric     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
5570b57cec5SDimitry Andric     mask_t *mask;
5580b57cec5SDimitry Andric 
5590b57cec5SDimitry Andric   public:
5600b57cec5SDimitry Andric     Mask() {
5610b57cec5SDimitry Andric       mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
5620b57cec5SDimitry Andric     }
5630b57cec5SDimitry Andric     ~Mask() {
5640b57cec5SDimitry Andric       if (mask)
5650b57cec5SDimitry Andric         __kmp_free(mask);
5660b57cec5SDimitry Andric     }
5670b57cec5SDimitry Andric     void set(int i) override {
5680b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
5690b57cec5SDimitry Andric     }
5700b57cec5SDimitry Andric     bool is_set(int i) const override {
5710b57cec5SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
5720b57cec5SDimitry Andric     }
5730b57cec5SDimitry Andric     void clear(int i) override {
5740b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
5750b57cec5SDimitry Andric     }
5760b57cec5SDimitry Andric     void zero() override {
5770b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
5780b57cec5SDimitry Andric         mask[i] = 0;
5790b57cec5SDimitry Andric     }
5805f757f3fSDimitry Andric     bool empty() const override {
5815f757f3fSDimitry Andric       for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
5825f757f3fSDimitry Andric         if (mask[i])
5835f757f3fSDimitry Andric           return false;
5845f757f3fSDimitry Andric       return true;
5855f757f3fSDimitry Andric     }
5860b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
5870b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
5880b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
5890b57cec5SDimitry Andric         mask[i] = convert->mask[i];
5900b57cec5SDimitry Andric     }
5910b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
5920b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
5930b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
5940b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
5950b57cec5SDimitry Andric     }
5960b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
5970b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
5980b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
5990b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
6000b57cec5SDimitry Andric     }
6010b57cec5SDimitry Andric     void bitwise_not() override {
6020b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
6030b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
6040b57cec5SDimitry Andric     }
6055f757f3fSDimitry Andric     bool is_equal(const KMPAffinity::Mask *rhs) const override {
6065f757f3fSDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
6075f757f3fSDimitry Andric       for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
6085f757f3fSDimitry Andric         if (mask[i] != convert->mask[i])
6095f757f3fSDimitry Andric           return false;
6105f757f3fSDimitry Andric       return true;
6115f757f3fSDimitry Andric     }
6120b57cec5SDimitry Andric     int begin() const override {
6130b57cec5SDimitry Andric       int retval = 0;
6140b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
6150b57cec5SDimitry Andric         ++retval;
6160b57cec5SDimitry Andric       return retval;
6170b57cec5SDimitry Andric     }
6180b57cec5SDimitry Andric     int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
6190b57cec5SDimitry Andric     int next(int previous) const override {
6200b57cec5SDimitry Andric       int retval = previous + 1;
6210b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
6220b57cec5SDimitry Andric         ++retval;
6230b57cec5SDimitry Andric       return retval;
6240b57cec5SDimitry Andric     }
625e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
626e8d8bef9SDimitry Andric       if (__kmp_num_proc_groups <= 1) {
627e8d8bef9SDimitry Andric         if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
628e8d8bef9SDimitry Andric           DWORD error = GetLastError();
629e8d8bef9SDimitry Andric           if (abort_on_error) {
630e8d8bef9SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
631e8d8bef9SDimitry Andric                         __kmp_msg_null);
632e8d8bef9SDimitry Andric           }
633e8d8bef9SDimitry Andric           return error;
634e8d8bef9SDimitry Andric         }
635e8d8bef9SDimitry Andric       }
636e8d8bef9SDimitry Andric       return 0;
637e8d8bef9SDimitry Andric     }
6380b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
6390b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
6400b57cec5SDimitry Andric         // Check for a valid mask.
6410b57cec5SDimitry Andric         GROUP_AFFINITY ga;
6420b57cec5SDimitry Andric         int group = get_proc_group();
6430b57cec5SDimitry Andric         if (group < 0) {
6440b57cec5SDimitry Andric           if (abort_on_error) {
6450b57cec5SDimitry Andric             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
6460b57cec5SDimitry Andric           }
6470b57cec5SDimitry Andric           return -1;
6480b57cec5SDimitry Andric         }
6490b57cec5SDimitry Andric         // Transform the bit vector into a GROUP_AFFINITY struct
6500b57cec5SDimitry Andric         // and make the system call to set affinity.
6510b57cec5SDimitry Andric         ga.Group = group;
6520b57cec5SDimitry Andric         ga.Mask = mask[group];
6530b57cec5SDimitry Andric         ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
6540b57cec5SDimitry Andric 
6550b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
6560b57cec5SDimitry Andric         if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
6570b57cec5SDimitry Andric           DWORD error = GetLastError();
6580b57cec5SDimitry Andric           if (abort_on_error) {
6590b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
6600b57cec5SDimitry Andric                         __kmp_msg_null);
6610b57cec5SDimitry Andric           }
6620b57cec5SDimitry Andric           return error;
6630b57cec5SDimitry Andric         }
6640b57cec5SDimitry Andric       } else {
6650b57cec5SDimitry Andric         if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
6660b57cec5SDimitry Andric           DWORD error = GetLastError();
6670b57cec5SDimitry Andric           if (abort_on_error) {
6680b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
6690b57cec5SDimitry Andric                         __kmp_msg_null);
6700b57cec5SDimitry Andric           }
6710b57cec5SDimitry Andric           return error;
6720b57cec5SDimitry Andric         }
6730b57cec5SDimitry Andric       }
6740b57cec5SDimitry Andric       return 0;
6750b57cec5SDimitry Andric     }
6760b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
6770b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
6780b57cec5SDimitry Andric         this->zero();
6790b57cec5SDimitry Andric         GROUP_AFFINITY ga;
6800b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
6810b57cec5SDimitry Andric         if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
6820b57cec5SDimitry Andric           DWORD error = GetLastError();
6830b57cec5SDimitry Andric           if (abort_on_error) {
6840b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
6850b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
6860b57cec5SDimitry Andric           }
6870b57cec5SDimitry Andric           return error;
6880b57cec5SDimitry Andric         }
6890b57cec5SDimitry Andric         if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
6900b57cec5SDimitry Andric             (ga.Mask == 0)) {
6910b57cec5SDimitry Andric           return -1;
6920b57cec5SDimitry Andric         }
6930b57cec5SDimitry Andric         mask[ga.Group] = ga.Mask;
6940b57cec5SDimitry Andric       } else {
6950b57cec5SDimitry Andric         mask_t newMask, sysMask, retval;
6960b57cec5SDimitry Andric         if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
6970b57cec5SDimitry Andric           DWORD error = GetLastError();
6980b57cec5SDimitry Andric           if (abort_on_error) {
6990b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
7000b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
7010b57cec5SDimitry Andric           }
7020b57cec5SDimitry Andric           return error;
7030b57cec5SDimitry Andric         }
7040b57cec5SDimitry Andric         retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
7050b57cec5SDimitry Andric         if (!retval) {
7060b57cec5SDimitry Andric           DWORD error = GetLastError();
7070b57cec5SDimitry Andric           if (abort_on_error) {
7080b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
7090b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
7100b57cec5SDimitry Andric           }
7110b57cec5SDimitry Andric           return error;
7120b57cec5SDimitry Andric         }
7130b57cec5SDimitry Andric         newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
7140b57cec5SDimitry Andric         if (!newMask) {
7150b57cec5SDimitry Andric           DWORD error = GetLastError();
7160b57cec5SDimitry Andric           if (abort_on_error) {
7170b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
7180b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
7190b57cec5SDimitry Andric           }
7200b57cec5SDimitry Andric         }
7210b57cec5SDimitry Andric         *mask = retval;
7220b57cec5SDimitry Andric       }
7230b57cec5SDimitry Andric       return 0;
7240b57cec5SDimitry Andric     }
7250b57cec5SDimitry Andric     int get_proc_group() const override {
7260b57cec5SDimitry Andric       int group = -1;
7270b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
7280b57cec5SDimitry Andric         return 1;
7290b57cec5SDimitry Andric       }
7300b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
7310b57cec5SDimitry Andric         if (mask[i] == 0)
7320b57cec5SDimitry Andric           continue;
7330b57cec5SDimitry Andric         if (group >= 0)
7340b57cec5SDimitry Andric           return -1;
7350b57cec5SDimitry Andric         group = i;
7360b57cec5SDimitry Andric       }
7370b57cec5SDimitry Andric       return group;
7380b57cec5SDimitry Andric     }
7390b57cec5SDimitry Andric   };
7400b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
7410b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
7420b57cec5SDimitry Andric   }
7430b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
7440b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
7450b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
7460b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
7470b57cec5SDimitry Andric     return new Mask[num];
7480b57cec5SDimitry Andric   }
7490b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
7500b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
7510b57cec5SDimitry Andric     delete[] windows_array;
7520b57cec5SDimitry Andric   }
7530b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
7540b57cec5SDimitry Andric                                       int index) override {
7550b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
7560b57cec5SDimitry Andric     return &(windows_array[index]);
7570b57cec5SDimitry Andric   }
7580b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
7590b57cec5SDimitry Andric };
7600b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
7610b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
7620b57cec5SDimitry Andric 
7630eae32dcSDimitry Andric // Describe an attribute for a level in the machine topology
7640eae32dcSDimitry Andric struct kmp_hw_attr_t {
7650eae32dcSDimitry Andric   int core_type : 8;
7660eae32dcSDimitry Andric   int core_eff : 8;
7670eae32dcSDimitry Andric   unsigned valid : 1;
7680eae32dcSDimitry Andric   unsigned reserved : 15;
7690eae32dcSDimitry Andric 
7700eae32dcSDimitry Andric   static const int UNKNOWN_CORE_EFF = -1;
7710eae32dcSDimitry Andric 
7720eae32dcSDimitry Andric   kmp_hw_attr_t()
7730eae32dcSDimitry Andric       : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
7740eae32dcSDimitry Andric         valid(0), reserved(0) {}
7750eae32dcSDimitry Andric   void set_core_type(kmp_hw_core_type_t type) {
7760eae32dcSDimitry Andric     valid = 1;
7770eae32dcSDimitry Andric     core_type = type;
7780eae32dcSDimitry Andric   }
7790eae32dcSDimitry Andric   void set_core_eff(int eff) {
7800eae32dcSDimitry Andric     valid = 1;
7810eae32dcSDimitry Andric     core_eff = eff;
7820eae32dcSDimitry Andric   }
7830eae32dcSDimitry Andric   kmp_hw_core_type_t get_core_type() const {
7840eae32dcSDimitry Andric     return (kmp_hw_core_type_t)core_type;
7850eae32dcSDimitry Andric   }
7860eae32dcSDimitry Andric   int get_core_eff() const { return core_eff; }
7870eae32dcSDimitry Andric   bool is_core_type_valid() const {
7880eae32dcSDimitry Andric     return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
7890eae32dcSDimitry Andric   }
7900eae32dcSDimitry Andric   bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
7910eae32dcSDimitry Andric   operator bool() const { return valid; }
7920eae32dcSDimitry Andric   void clear() {
7930eae32dcSDimitry Andric     core_type = KMP_HW_CORE_TYPE_UNKNOWN;
7940eae32dcSDimitry Andric     core_eff = UNKNOWN_CORE_EFF;
7950eae32dcSDimitry Andric     valid = 0;
7960eae32dcSDimitry Andric   }
7970eae32dcSDimitry Andric   bool contains(const kmp_hw_attr_t &other) const {
7980eae32dcSDimitry Andric     if (!valid && !other.valid)
7990eae32dcSDimitry Andric       return true;
8000eae32dcSDimitry Andric     if (valid && other.valid) {
8010eae32dcSDimitry Andric       if (other.is_core_type_valid()) {
8020eae32dcSDimitry Andric         if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
8030eae32dcSDimitry Andric           return false;
8040eae32dcSDimitry Andric       }
8050eae32dcSDimitry Andric       if (other.is_core_eff_valid()) {
8060eae32dcSDimitry Andric         if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
8070eae32dcSDimitry Andric           return false;
8080eae32dcSDimitry Andric       }
8090eae32dcSDimitry Andric       return true;
8100eae32dcSDimitry Andric     }
8110eae32dcSDimitry Andric     return false;
8120eae32dcSDimitry Andric   }
8135f757f3fSDimitry Andric #if KMP_AFFINITY_SUPPORTED
8145f757f3fSDimitry Andric   bool contains(const kmp_affinity_attrs_t &attr) const {
8155f757f3fSDimitry Andric     if (!valid && !attr.valid)
8165f757f3fSDimitry Andric       return true;
8175f757f3fSDimitry Andric     if (valid && attr.valid) {
8185f757f3fSDimitry Andric       if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN)
8195f757f3fSDimitry Andric         return (is_core_type_valid() &&
8205f757f3fSDimitry Andric                 (get_core_type() == (kmp_hw_core_type_t)attr.core_type));
8215f757f3fSDimitry Andric       if (attr.core_eff != UNKNOWN_CORE_EFF)
8225f757f3fSDimitry Andric         return (is_core_eff_valid() && (get_core_eff() == attr.core_eff));
8235f757f3fSDimitry Andric       return true;
8245f757f3fSDimitry Andric     }
8255f757f3fSDimitry Andric     return false;
8265f757f3fSDimitry Andric   }
8275f757f3fSDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
8280eae32dcSDimitry Andric   bool operator==(const kmp_hw_attr_t &rhs) const {
8290eae32dcSDimitry Andric     return (rhs.valid == valid && rhs.core_eff == core_eff &&
8300eae32dcSDimitry Andric             rhs.core_type == core_type);
8310eae32dcSDimitry Andric   }
8320eae32dcSDimitry Andric   bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
8330eae32dcSDimitry Andric };
834349cc55cSDimitry Andric 
835bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED
836bdd1243dSDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t));
837bdd1243dSDimitry Andric #endif
838bdd1243dSDimitry Andric 
839fe6060f1SDimitry Andric class kmp_hw_thread_t {
8400b57cec5SDimitry Andric public:
841fe6060f1SDimitry Andric   static const int UNKNOWN_ID = -1;
842bdd1243dSDimitry Andric   static const int MULTIPLE_ID = -2;
843fe6060f1SDimitry Andric   static int compare_ids(const void *a, const void *b);
844fe6060f1SDimitry Andric   static int compare_compact(const void *a, const void *b);
845fe6060f1SDimitry Andric   int ids[KMP_HW_LAST];
846fe6060f1SDimitry Andric   int sub_ids[KMP_HW_LAST];
847fe6060f1SDimitry Andric   bool leader;
848fe6060f1SDimitry Andric   int os_id;
8490eae32dcSDimitry Andric   kmp_hw_attr_t attrs;
850349cc55cSDimitry Andric 
851fe6060f1SDimitry Andric   void print() const;
852fe6060f1SDimitry Andric   void clear() {
853fe6060f1SDimitry Andric     for (int i = 0; i < (int)KMP_HW_LAST; ++i)
854fe6060f1SDimitry Andric       ids[i] = UNKNOWN_ID;
855fe6060f1SDimitry Andric     leader = false;
8560eae32dcSDimitry Andric     attrs.clear();
8570b57cec5SDimitry Andric   }
8580b57cec5SDimitry Andric };
8590b57cec5SDimitry Andric 
860fe6060f1SDimitry Andric class kmp_topology_t {
861fe6060f1SDimitry Andric 
862fe6060f1SDimitry Andric   struct flags_t {
863fe6060f1SDimitry Andric     int uniform : 1;
864fe6060f1SDimitry Andric     int reserved : 31;
8650b57cec5SDimitry Andric   };
8660b57cec5SDimitry Andric 
867fe6060f1SDimitry Andric   int depth;
868fe6060f1SDimitry Andric 
869349cc55cSDimitry Andric   // The following arrays are all 'depth' long and have been
870349cc55cSDimitry Andric   // allocated to hold up to KMP_HW_LAST number of objects if
871349cc55cSDimitry Andric   // needed so layers can be added without reallocation of any array
872fe6060f1SDimitry Andric 
873fe6060f1SDimitry Andric   // Orderd array of the types in the topology
874fe6060f1SDimitry Andric   kmp_hw_t *types;
875fe6060f1SDimitry Andric 
876fe6060f1SDimitry Andric   // Keep quick topology ratios, for non-uniform topologies,
877fe6060f1SDimitry Andric   // this ratio holds the max number of itemAs per itemB
878fe6060f1SDimitry Andric   // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
879fe6060f1SDimitry Andric   int *ratio;
880fe6060f1SDimitry Andric 
881fe6060f1SDimitry Andric   // Storage containing the absolute number of each topology layer
882fe6060f1SDimitry Andric   int *count;
883fe6060f1SDimitry Andric 
8840eae32dcSDimitry Andric   // The number of core efficiencies. This is only useful for hybrid
8850eae32dcSDimitry Andric   // topologies. Core efficiencies will range from 0 to num efficiencies - 1
8860eae32dcSDimitry Andric   int num_core_efficiencies;
8870eae32dcSDimitry Andric   int num_core_types;
888349cc55cSDimitry Andric   kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
889349cc55cSDimitry Andric 
890fe6060f1SDimitry Andric   // The hardware threads array
891fe6060f1SDimitry Andric   // hw_threads is num_hw_threads long
892fe6060f1SDimitry Andric   // Each hw_thread's ids and sub_ids are depth deep
893fe6060f1SDimitry Andric   int num_hw_threads;
894fe6060f1SDimitry Andric   kmp_hw_thread_t *hw_threads;
895fe6060f1SDimitry Andric 
896fe6060f1SDimitry Andric   // Equivalence hash where the key is the hardware topology item
897fe6060f1SDimitry Andric   // and the value is the equivalent hardware topology type in the
898fe6060f1SDimitry Andric   // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
899fe6060f1SDimitry Andric   // known equivalence for the topology type
900fe6060f1SDimitry Andric   kmp_hw_t equivalent[KMP_HW_LAST];
901fe6060f1SDimitry Andric 
902fe6060f1SDimitry Andric   // Flags describing the topology
903fe6060f1SDimitry Andric   flags_t flags;
904fe6060f1SDimitry Andric 
905bdd1243dSDimitry Andric   // Compact value used during sort_compact()
906bdd1243dSDimitry Andric   int compact;
907bdd1243dSDimitry Andric 
908349cc55cSDimitry Andric   // Insert a new topology layer after allocation
909349cc55cSDimitry Andric   void _insert_layer(kmp_hw_t type, const int *ids);
910349cc55cSDimitry Andric 
911349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY
912349cc55cSDimitry Andric   // Insert topology information about Windows Processor groups
913349cc55cSDimitry Andric   void _insert_windows_proc_groups();
914349cc55cSDimitry Andric #endif
915349cc55cSDimitry Andric 
916fe6060f1SDimitry Andric   // Count each item & get the num x's per y
917fe6060f1SDimitry Andric   // e.g., get the number of cores and the number of threads per core
918fe6060f1SDimitry Andric   // for each (x, y) in (KMP_HW_* , KMP_HW_*)
919fe6060f1SDimitry Andric   void _gather_enumeration_information();
920fe6060f1SDimitry Andric 
921fe6060f1SDimitry Andric   // Remove layers that don't add information to the topology.
922fe6060f1SDimitry Andric   // This is done by having the layer take on the id = UNKNOWN_ID (-1)
923fe6060f1SDimitry Andric   void _remove_radix1_layers();
924fe6060f1SDimitry Andric 
925fe6060f1SDimitry Andric   // Find out if the topology is uniform
926fe6060f1SDimitry Andric   void _discover_uniformity();
927fe6060f1SDimitry Andric 
928fe6060f1SDimitry Andric   // Set all the sub_ids for each hardware thread
929fe6060f1SDimitry Andric   void _set_sub_ids();
930fe6060f1SDimitry Andric 
931fe6060f1SDimitry Andric   // Set global affinity variables describing the number of threads per
932fe6060f1SDimitry Andric   // core, the number of packages, the number of cores per package, and
933fe6060f1SDimitry Andric   // the number of cores.
934fe6060f1SDimitry Andric   void _set_globals();
935fe6060f1SDimitry Andric 
936fe6060f1SDimitry Andric   // Set the last level cache equivalent type
937fe6060f1SDimitry Andric   void _set_last_level_cache();
938fe6060f1SDimitry Andric 
9390eae32dcSDimitry Andric   // Return the number of cores with a particular attribute, 'attr'.
9400eae32dcSDimitry Andric   // If 'find_all' is true, then find all cores on the machine, otherwise find
9410eae32dcSDimitry Andric   // all cores per the layer 'above'
9420eae32dcSDimitry Andric   int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
9430eae32dcSDimitry Andric                             bool find_all = false) const;
944349cc55cSDimitry Andric 
945fe6060f1SDimitry Andric public:
946fe6060f1SDimitry Andric   // Force use of allocate()/deallocate()
947fe6060f1SDimitry Andric   kmp_topology_t() = delete;
948fe6060f1SDimitry Andric   kmp_topology_t(const kmp_topology_t &t) = delete;
949fe6060f1SDimitry Andric   kmp_topology_t(kmp_topology_t &&t) = delete;
950fe6060f1SDimitry Andric   kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
951fe6060f1SDimitry Andric   kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
952fe6060f1SDimitry Andric 
953fe6060f1SDimitry Andric   static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
954fe6060f1SDimitry Andric   static void deallocate(kmp_topology_t *);
955fe6060f1SDimitry Andric 
956fe6060f1SDimitry Andric   // Functions used in create_map() routines
957fe6060f1SDimitry Andric   kmp_hw_thread_t &at(int index) {
958fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
959fe6060f1SDimitry Andric     return hw_threads[index];
960fe6060f1SDimitry Andric   }
961fe6060f1SDimitry Andric   const kmp_hw_thread_t &at(int index) const {
962fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
963fe6060f1SDimitry Andric     return hw_threads[index];
964fe6060f1SDimitry Andric   }
965fe6060f1SDimitry Andric   int get_num_hw_threads() const { return num_hw_threads; }
966fe6060f1SDimitry Andric   void sort_ids() {
967fe6060f1SDimitry Andric     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
968fe6060f1SDimitry Andric           kmp_hw_thread_t::compare_ids);
969fe6060f1SDimitry Andric   }
970fe6060f1SDimitry Andric   // Check if the hardware ids are unique, if they are
971fe6060f1SDimitry Andric   // return true, otherwise return false
972fe6060f1SDimitry Andric   bool check_ids() const;
973fe6060f1SDimitry Andric 
974fe6060f1SDimitry Andric   // Function to call after the create_map() routine
975fe6060f1SDimitry Andric   void canonicalize();
976fe6060f1SDimitry Andric   void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
977fe6060f1SDimitry Andric 
978fe6060f1SDimitry Andric // Functions used after canonicalize() called
979bdd1243dSDimitry Andric 
980bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED
981bdd1243dSDimitry Andric   // Set the granularity for affinity settings
982bdd1243dSDimitry Andric   void set_granularity(kmp_affinity_t &stgs) const;
9835f757f3fSDimitry Andric   bool is_close(int hwt1, int hwt2, const kmp_affinity_t &stgs) const;
9845f757f3fSDimitry Andric   bool restrict_to_mask(const kmp_affin_mask_t *mask);
985fe6060f1SDimitry Andric   bool filter_hw_subset();
9865f757f3fSDimitry Andric #endif
987fe6060f1SDimitry Andric   bool is_uniform() const { return flags.uniform; }
988fe6060f1SDimitry Andric   // Tell whether a type is a valid type in the topology
989fe6060f1SDimitry Andric   // returns KMP_HW_UNKNOWN when there is no equivalent type
9905f757f3fSDimitry Andric   kmp_hw_t get_equivalent_type(kmp_hw_t type) const {
9915f757f3fSDimitry Andric     if (type == KMP_HW_UNKNOWN)
9925f757f3fSDimitry Andric       return KMP_HW_UNKNOWN;
9935f757f3fSDimitry Andric     return equivalent[type];
9945f757f3fSDimitry Andric   }
995fe6060f1SDimitry Andric   // Set type1 = type2
996fe6060f1SDimitry Andric   void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
997fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
998fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
999fe6060f1SDimitry Andric     kmp_hw_t real_type2 = equivalent[type2];
1000fe6060f1SDimitry Andric     if (real_type2 == KMP_HW_UNKNOWN)
1001fe6060f1SDimitry Andric       real_type2 = type2;
1002fe6060f1SDimitry Andric     equivalent[type1] = real_type2;
1003fe6060f1SDimitry Andric     // This loop is required since any of the types may have been set to
1004fe6060f1SDimitry Andric     // be equivalent to type1.  They all must be checked and reset to type2.
1005fe6060f1SDimitry Andric     KMP_FOREACH_HW_TYPE(type) {
1006fe6060f1SDimitry Andric       if (equivalent[type] == type1) {
1007fe6060f1SDimitry Andric         equivalent[type] = real_type2;
1008fe6060f1SDimitry Andric       }
1009fe6060f1SDimitry Andric     }
1010fe6060f1SDimitry Andric   }
1011fe6060f1SDimitry Andric   // Calculate number of types corresponding to level1
1012fe6060f1SDimitry Andric   // per types corresponding to level2 (e.g., number of threads per core)
1013fe6060f1SDimitry Andric   int calculate_ratio(int level1, int level2) const {
1014fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
1015fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
1016fe6060f1SDimitry Andric     int r = 1;
1017fe6060f1SDimitry Andric     for (int level = level1; level > level2; --level)
1018fe6060f1SDimitry Andric       r *= ratio[level];
1019fe6060f1SDimitry Andric     return r;
1020fe6060f1SDimitry Andric   }
1021fe6060f1SDimitry Andric   int get_ratio(int level) const {
1022fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
1023fe6060f1SDimitry Andric     return ratio[level];
1024fe6060f1SDimitry Andric   }
1025fe6060f1SDimitry Andric   int get_depth() const { return depth; };
1026fe6060f1SDimitry Andric   kmp_hw_t get_type(int level) const {
1027fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
1028fe6060f1SDimitry Andric     return types[level];
1029fe6060f1SDimitry Andric   }
1030fe6060f1SDimitry Andric   int get_level(kmp_hw_t type) const {
1031fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
1032fe6060f1SDimitry Andric     int eq_type = equivalent[type];
1033fe6060f1SDimitry Andric     if (eq_type == KMP_HW_UNKNOWN)
10340b57cec5SDimitry Andric       return -1;
1035fe6060f1SDimitry Andric     for (int i = 0; i < depth; ++i)
1036fe6060f1SDimitry Andric       if (types[i] == eq_type)
1037fe6060f1SDimitry Andric         return i;
1038fe6060f1SDimitry Andric     return -1;
10390b57cec5SDimitry Andric   }
1040fe6060f1SDimitry Andric   int get_count(int level) const {
1041fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
1042fe6060f1SDimitry Andric     return count[level];
10430b57cec5SDimitry Andric   }
10440eae32dcSDimitry Andric   // Return the total number of cores with attribute 'attr'
10450eae32dcSDimitry Andric   int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
10460eae32dcSDimitry Andric     return _get_ncores_with_attr(attr, -1, true);
10470eae32dcSDimitry Andric   }
10480eae32dcSDimitry Andric   // Return the number of cores with attribute
10490eae32dcSDimitry Andric   // 'attr' per topology level 'above'
10500eae32dcSDimitry Andric   int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
10510eae32dcSDimitry Andric     return _get_ncores_with_attr(attr, above, false);
10520eae32dcSDimitry Andric   }
10530eae32dcSDimitry Andric 
1054fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED
1055bdd1243dSDimitry Andric   friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
1056bdd1243dSDimitry Andric   void sort_compact(kmp_affinity_t &affinity) {
1057bdd1243dSDimitry Andric     compact = affinity.compact;
1058fe6060f1SDimitry Andric     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
1059fe6060f1SDimitry Andric           kmp_hw_thread_t::compare_compact);
1060fe6060f1SDimitry Andric   }
1061fe6060f1SDimitry Andric #endif
1062fe6060f1SDimitry Andric   void print(const char *env_var = "KMP_AFFINITY") const;
1063fe6060f1SDimitry Andric   void dump() const;
1064fe6060f1SDimitry Andric };
1065349cc55cSDimitry Andric extern kmp_topology_t *__kmp_topology;
1066fe6060f1SDimitry Andric 
1067fe6060f1SDimitry Andric class kmp_hw_subset_t {
10680eae32dcSDimitry Andric   const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
10690eae32dcSDimitry Andric 
1070fe6060f1SDimitry Andric public:
10710eae32dcSDimitry Andric   // Describe a machine topology item in KMP_HW_SUBSET
1072fe6060f1SDimitry Andric   struct item_t {
1073fe6060f1SDimitry Andric     kmp_hw_t type;
10740eae32dcSDimitry Andric     int num_attrs;
10750eae32dcSDimitry Andric     int num[MAX_ATTRS];
10760eae32dcSDimitry Andric     int offset[MAX_ATTRS];
10770eae32dcSDimitry Andric     kmp_hw_attr_t attr[MAX_ATTRS];
1078fe6060f1SDimitry Andric   };
10790eae32dcSDimitry Andric   // Put parenthesis around max to avoid accidental use of Windows max macro.
10800eae32dcSDimitry Andric   const static int USE_ALL = (std::numeric_limits<int>::max)();
1081fe6060f1SDimitry Andric 
1082fe6060f1SDimitry Andric private:
1083fe6060f1SDimitry Andric   int depth;
1084fe6060f1SDimitry Andric   int capacity;
1085fe6060f1SDimitry Andric   item_t *items;
1086fe6060f1SDimitry Andric   kmp_uint64 set;
1087fe6060f1SDimitry Andric   bool absolute;
1088fe6060f1SDimitry Andric   // The set must be able to handle up to KMP_HW_LAST number of layers
1089fe6060f1SDimitry Andric   KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
1090349cc55cSDimitry Andric   // Sorting the KMP_HW_SUBSET items to follow topology order
1091349cc55cSDimitry Andric   // All unknown topology types will be at the beginning of the subset
1092349cc55cSDimitry Andric   static int hw_subset_compare(const void *i1, const void *i2) {
1093349cc55cSDimitry Andric     kmp_hw_t type1 = ((const item_t *)i1)->type;
1094349cc55cSDimitry Andric     kmp_hw_t type2 = ((const item_t *)i2)->type;
1095349cc55cSDimitry Andric     int level1 = __kmp_topology->get_level(type1);
1096349cc55cSDimitry Andric     int level2 = __kmp_topology->get_level(type2);
1097349cc55cSDimitry Andric     return level1 - level2;
1098349cc55cSDimitry Andric   }
1099fe6060f1SDimitry Andric 
1100fe6060f1SDimitry Andric public:
1101fe6060f1SDimitry Andric   // Force use of allocate()/deallocate()
1102fe6060f1SDimitry Andric   kmp_hw_subset_t() = delete;
1103fe6060f1SDimitry Andric   kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
1104fe6060f1SDimitry Andric   kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
1105fe6060f1SDimitry Andric   kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
1106fe6060f1SDimitry Andric   kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
1107fe6060f1SDimitry Andric 
1108fe6060f1SDimitry Andric   static kmp_hw_subset_t *allocate() {
1109fe6060f1SDimitry Andric     int initial_capacity = 5;
1110fe6060f1SDimitry Andric     kmp_hw_subset_t *retval =
1111fe6060f1SDimitry Andric         (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
1112fe6060f1SDimitry Andric     retval->depth = 0;
1113fe6060f1SDimitry Andric     retval->capacity = initial_capacity;
1114fe6060f1SDimitry Andric     retval->set = 0ull;
1115fe6060f1SDimitry Andric     retval->absolute = false;
1116fe6060f1SDimitry Andric     retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
1117fe6060f1SDimitry Andric     return retval;
1118fe6060f1SDimitry Andric   }
1119fe6060f1SDimitry Andric   static void deallocate(kmp_hw_subset_t *subset) {
1120fe6060f1SDimitry Andric     __kmp_free(subset->items);
1121fe6060f1SDimitry Andric     __kmp_free(subset);
1122fe6060f1SDimitry Andric   }
1123fe6060f1SDimitry Andric   void set_absolute() { absolute = true; }
1124fe6060f1SDimitry Andric   bool is_absolute() const { return absolute; }
11250eae32dcSDimitry Andric   void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
11260eae32dcSDimitry Andric     for (int i = 0; i < depth; ++i) {
11270eae32dcSDimitry Andric       // Found an existing item for this layer type
11280eae32dcSDimitry Andric       // Add the num, offset, and attr to this item
11290eae32dcSDimitry Andric       if (items[i].type == type) {
11300eae32dcSDimitry Andric         int idx = items[i].num_attrs++;
11310eae32dcSDimitry Andric         if ((size_t)idx >= MAX_ATTRS)
11320eae32dcSDimitry Andric           return;
11330eae32dcSDimitry Andric         items[i].num[idx] = num;
11340eae32dcSDimitry Andric         items[i].offset[idx] = offset;
11350eae32dcSDimitry Andric         items[i].attr[idx] = attr;
11360eae32dcSDimitry Andric         return;
11370eae32dcSDimitry Andric       }
11380eae32dcSDimitry Andric     }
1139fe6060f1SDimitry Andric     if (depth == capacity - 1) {
1140fe6060f1SDimitry Andric       capacity *= 2;
1141fe6060f1SDimitry Andric       item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
1142fe6060f1SDimitry Andric       for (int i = 0; i < depth; ++i)
1143fe6060f1SDimitry Andric         new_items[i] = items[i];
1144fe6060f1SDimitry Andric       __kmp_free(items);
1145fe6060f1SDimitry Andric       items = new_items;
1146fe6060f1SDimitry Andric     }
11470eae32dcSDimitry Andric     items[depth].num_attrs = 1;
1148fe6060f1SDimitry Andric     items[depth].type = type;
11490eae32dcSDimitry Andric     items[depth].num[0] = num;
11500eae32dcSDimitry Andric     items[depth].offset[0] = offset;
11510eae32dcSDimitry Andric     items[depth].attr[0] = attr;
1152fe6060f1SDimitry Andric     depth++;
1153fe6060f1SDimitry Andric     set |= (1ull << type);
1154fe6060f1SDimitry Andric   }
1155fe6060f1SDimitry Andric   int get_depth() const { return depth; }
1156fe6060f1SDimitry Andric   const item_t &at(int index) const {
1157fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1158fe6060f1SDimitry Andric     return items[index];
1159fe6060f1SDimitry Andric   }
1160fe6060f1SDimitry Andric   item_t &at(int index) {
1161fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1162fe6060f1SDimitry Andric     return items[index];
1163fe6060f1SDimitry Andric   }
1164fe6060f1SDimitry Andric   void remove(int index) {
1165fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1166fe6060f1SDimitry Andric     set &= ~(1ull << items[index].type);
1167fe6060f1SDimitry Andric     for (int j = index + 1; j < depth; ++j) {
1168fe6060f1SDimitry Andric       items[j - 1] = items[j];
1169fe6060f1SDimitry Andric     }
1170fe6060f1SDimitry Andric     depth--;
1171fe6060f1SDimitry Andric   }
1172349cc55cSDimitry Andric   void sort() {
1173349cc55cSDimitry Andric     KMP_DEBUG_ASSERT(__kmp_topology);
1174349cc55cSDimitry Andric     qsort(items, depth, sizeof(item_t), hw_subset_compare);
1175349cc55cSDimitry Andric   }
1176fe6060f1SDimitry Andric   bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
1177*0fca6ea1SDimitry Andric 
1178*0fca6ea1SDimitry Andric   // Canonicalize the KMP_HW_SUBSET value if it is not an absolute subset.
1179*0fca6ea1SDimitry Andric   // This means putting each of {sockets, cores, threads} in the topology if
1180*0fca6ea1SDimitry Andric   // they are not specified:
1181*0fca6ea1SDimitry Andric   // e.g., 1s,2c => 1s,2c,*t | 2c,1t => *s,2c,1t | 1t => *s,*c,1t | etc.
1182*0fca6ea1SDimitry Andric   // e.g., 3module => *s,3module,*c,*t
1183*0fca6ea1SDimitry Andric   // By doing this, the runtime assumes users who fiddle with KMP_HW_SUBSET
1184*0fca6ea1SDimitry Andric   // are expecting the traditional sockets/cores/threads topology. For newer
1185*0fca6ea1SDimitry Andric   // hardware, there can be intervening layers like dies/tiles/modules
1186*0fca6ea1SDimitry Andric   // (usually corresponding to a cache level). So when a user asks for
1187*0fca6ea1SDimitry Andric   // 1s,6c,2t and the topology is really 1s,2modules,4cores,2threads, the user
1188*0fca6ea1SDimitry Andric   // should get 12 hardware threads across 6 cores and effectively ignore the
1189*0fca6ea1SDimitry Andric   // module layer.
1190*0fca6ea1SDimitry Andric   void canonicalize(const kmp_topology_t *top) {
1191*0fca6ea1SDimitry Andric     // Layers to target for KMP_HW_SUBSET canonicalization
1192*0fca6ea1SDimitry Andric     kmp_hw_t targeted[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD};
1193*0fca6ea1SDimitry Andric 
1194*0fca6ea1SDimitry Andric     // Do not target-layer-canonicalize absolute KMP_HW_SUBSETS
1195*0fca6ea1SDimitry Andric     if (is_absolute())
1196*0fca6ea1SDimitry Andric       return;
1197*0fca6ea1SDimitry Andric 
1198*0fca6ea1SDimitry Andric     // Do not target-layer-canonicalize KMP_HW_SUBSETS when the
1199*0fca6ea1SDimitry Andric     // topology doesn't have these layers
1200*0fca6ea1SDimitry Andric     for (kmp_hw_t type : targeted)
1201*0fca6ea1SDimitry Andric       if (top->get_level(type) == KMP_HW_UNKNOWN)
1202*0fca6ea1SDimitry Andric         return;
1203*0fca6ea1SDimitry Andric 
1204*0fca6ea1SDimitry Andric     // Put targeted layers in topology if they do not exist
1205*0fca6ea1SDimitry Andric     for (kmp_hw_t type : targeted) {
1206*0fca6ea1SDimitry Andric       bool found = false;
1207*0fca6ea1SDimitry Andric       for (int i = 0; i < get_depth(); ++i) {
1208*0fca6ea1SDimitry Andric         if (top->get_equivalent_type(items[i].type) == type) {
1209*0fca6ea1SDimitry Andric           found = true;
1210*0fca6ea1SDimitry Andric           break;
1211*0fca6ea1SDimitry Andric         }
1212*0fca6ea1SDimitry Andric       }
1213*0fca6ea1SDimitry Andric       if (!found) {
1214*0fca6ea1SDimitry Andric         push_back(USE_ALL, type, 0, kmp_hw_attr_t{});
1215*0fca6ea1SDimitry Andric       }
1216*0fca6ea1SDimitry Andric     }
1217*0fca6ea1SDimitry Andric     sort();
1218*0fca6ea1SDimitry Andric     // Set as an absolute topology that only targets the targeted layers
1219*0fca6ea1SDimitry Andric     set_absolute();
1220*0fca6ea1SDimitry Andric   }
1221fe6060f1SDimitry Andric   void dump() const {
1222fe6060f1SDimitry Andric     printf("**********************\n");
1223fe6060f1SDimitry Andric     printf("*** kmp_hw_subset: ***\n");
1224fe6060f1SDimitry Andric     printf("* depth: %d\n", depth);
1225fe6060f1SDimitry Andric     printf("* items:\n");
1226fe6060f1SDimitry Andric     for (int i = 0; i < depth; ++i) {
12270eae32dcSDimitry Andric       printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
12280eae32dcSDimitry Andric       for (int j = 0; j < items[i].num_attrs; ++j) {
12290eae32dcSDimitry Andric         printf("  num: %d, offset: %d, attr: ", items[i].num[j],
12300eae32dcSDimitry Andric                items[i].offset[j]);
12310eae32dcSDimitry Andric         if (!items[i].attr[j]) {
12320eae32dcSDimitry Andric           printf(" (none)\n");
12330eae32dcSDimitry Andric         } else {
12340eae32dcSDimitry Andric           printf(
12350eae32dcSDimitry Andric               " core_type = %s, core_eff = %d\n",
12360eae32dcSDimitry Andric               __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
12370eae32dcSDimitry Andric               items[i].attr[j].get_core_eff());
12380eae32dcSDimitry Andric         }
12390eae32dcSDimitry Andric       }
1240fe6060f1SDimitry Andric     }
1241fe6060f1SDimitry Andric     printf("* set: 0x%llx\n", set);
1242fe6060f1SDimitry Andric     printf("* absolute: %d\n", absolute);
1243fe6060f1SDimitry Andric     printf("**********************\n");
1244fe6060f1SDimitry Andric   }
1245fe6060f1SDimitry Andric };
1246fe6060f1SDimitry Andric extern kmp_hw_subset_t *__kmp_hw_subset;
12470b57cec5SDimitry Andric 
12480b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once
12490b57cec5SDimitry Andric    at init. This structure represents a mapping of threads to the actual machine
12500b57cec5SDimitry Andric    hierarchy, or to our best guess at what the hierarchy might be, for the
12510b57cec5SDimitry Andric    purpose of performing an efficient barrier. In the worst case, when there is
12520b57cec5SDimitry Andric    no machine hierarchy information, it produces a tree suitable for a barrier,
12530b57cec5SDimitry Andric    similar to the tree used in the hyper barrier. */
12540b57cec5SDimitry Andric class hierarchy_info {
12550b57cec5SDimitry Andric public:
12560b57cec5SDimitry Andric   /* Good default values for number of leaves and branching factor, given no
12570b57cec5SDimitry Andric      affinity information. Behaves a bit like hyper barrier. */
12580b57cec5SDimitry Andric   static const kmp_uint32 maxLeaves = 4;
12590b57cec5SDimitry Andric   static const kmp_uint32 minBranch = 4;
12600b57cec5SDimitry Andric   /** Number of levels in the hierarchy. Typical levels are threads/core,
12610b57cec5SDimitry Andric       cores/package or socket, packages/node, nodes/machine, etc. We don't want
12620b57cec5SDimitry Andric       to get specific with nomenclature. When the machine is oversubscribed we
12630b57cec5SDimitry Andric       add levels to duplicate the hierarchy, doubling the thread capacity of the
12640b57cec5SDimitry Andric       hierarchy each time we add a level. */
12650b57cec5SDimitry Andric   kmp_uint32 maxLevels;
12660b57cec5SDimitry Andric 
12670b57cec5SDimitry Andric   /** This is specifically the depth of the machine configuration hierarchy, in
12680b57cec5SDimitry Andric       terms of the number of levels along the longest path from root to any
12690b57cec5SDimitry Andric       leaf. It corresponds to the number of entries in numPerLevel if we exclude
12700b57cec5SDimitry Andric       all but one trailing 1. */
12710b57cec5SDimitry Andric   kmp_uint32 depth;
12720b57cec5SDimitry Andric   kmp_uint32 base_num_threads;
12730b57cec5SDimitry Andric   enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
12740b57cec5SDimitry Andric   volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
12750b57cec5SDimitry Andric   // 2=initialization in progress
12760b57cec5SDimitry Andric   volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
12770b57cec5SDimitry Andric 
12780b57cec5SDimitry Andric   /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
12790b57cec5SDimitry Andric       the parent of a node at level i has. For example, if we have a machine
12800b57cec5SDimitry Andric       with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
12810b57cec5SDimitry Andric       {2, 4, 4, 1, 1}. All empty levels are set to 1. */
12820b57cec5SDimitry Andric   kmp_uint32 *numPerLevel;
12830b57cec5SDimitry Andric   kmp_uint32 *skipPerLevel;
12840b57cec5SDimitry Andric 
1285fe6060f1SDimitry Andric   void deriveLevels() {
1286fe6060f1SDimitry Andric     int hier_depth = __kmp_topology->get_depth();
1287fe6060f1SDimitry Andric     for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1288fe6060f1SDimitry Andric       numPerLevel[level] = __kmp_topology->get_ratio(i);
12890b57cec5SDimitry Andric     }
12900b57cec5SDimitry Andric   }
12910b57cec5SDimitry Andric 
12920b57cec5SDimitry Andric   hierarchy_info()
12930b57cec5SDimitry Andric       : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
12940b57cec5SDimitry Andric 
12950b57cec5SDimitry Andric   void fini() {
12960b57cec5SDimitry Andric     if (!uninitialized && numPerLevel) {
12970b57cec5SDimitry Andric       __kmp_free(numPerLevel);
12980b57cec5SDimitry Andric       numPerLevel = NULL;
12990b57cec5SDimitry Andric       uninitialized = not_initialized;
13000b57cec5SDimitry Andric     }
13010b57cec5SDimitry Andric   }
13020b57cec5SDimitry Andric 
1303fe6060f1SDimitry Andric   void init(int num_addrs) {
13040b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
13050b57cec5SDimitry Andric         &uninitialized, not_initialized, initializing);
13060b57cec5SDimitry Andric     if (bool_result == 0) { // Wait for initialization
13070b57cec5SDimitry Andric       while (TCR_1(uninitialized) != initialized)
13080b57cec5SDimitry Andric         KMP_CPU_PAUSE();
13090b57cec5SDimitry Andric       return;
13100b57cec5SDimitry Andric     }
13110b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result == 1);
13120b57cec5SDimitry Andric 
13130b57cec5SDimitry Andric     /* Added explicit initialization of the data fields here to prevent usage of
13140b57cec5SDimitry Andric        dirty value observed when static library is re-initialized multiple times
13150b57cec5SDimitry Andric        (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
13160b57cec5SDimitry Andric        OpenMP). */
13170b57cec5SDimitry Andric     depth = 1;
13180b57cec5SDimitry Andric     resizing = 0;
13190b57cec5SDimitry Andric     maxLevels = 7;
13200b57cec5SDimitry Andric     numPerLevel =
13210b57cec5SDimitry Andric         (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
13220b57cec5SDimitry Andric     skipPerLevel = &(numPerLevel[maxLevels]);
13230b57cec5SDimitry Andric     for (kmp_uint32 i = 0; i < maxLevels;
13240b57cec5SDimitry Andric          ++i) { // init numPerLevel[*] to 1 item per level
13250b57cec5SDimitry Andric       numPerLevel[i] = 1;
13260b57cec5SDimitry Andric       skipPerLevel[i] = 1;
13270b57cec5SDimitry Andric     }
13280b57cec5SDimitry Andric 
13290b57cec5SDimitry Andric     // Sort table by physical ID
1330fe6060f1SDimitry Andric     if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1331fe6060f1SDimitry Andric       deriveLevels();
13320b57cec5SDimitry Andric     } else {
13330b57cec5SDimitry Andric       numPerLevel[0] = maxLeaves;
13340b57cec5SDimitry Andric       numPerLevel[1] = num_addrs / maxLeaves;
13350b57cec5SDimitry Andric       if (num_addrs % maxLeaves)
13360b57cec5SDimitry Andric         numPerLevel[1]++;
13370b57cec5SDimitry Andric     }
13380b57cec5SDimitry Andric 
13390b57cec5SDimitry Andric     base_num_threads = num_addrs;
13400b57cec5SDimitry Andric     for (int i = maxLevels - 1; i >= 0;
13410b57cec5SDimitry Andric          --i) // count non-empty levels to get depth
13420b57cec5SDimitry Andric       if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
13430b57cec5SDimitry Andric         depth++;
13440b57cec5SDimitry Andric 
13450b57cec5SDimitry Andric     kmp_uint32 branch = minBranch;
13460b57cec5SDimitry Andric     if (numPerLevel[0] == 1)
13470b57cec5SDimitry Andric       branch = num_addrs / maxLeaves;
13480b57cec5SDimitry Andric     if (branch < minBranch)
13490b57cec5SDimitry Andric       branch = minBranch;
13500b57cec5SDimitry Andric     for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
13510b57cec5SDimitry Andric       while (numPerLevel[d] > branch ||
13520b57cec5SDimitry Andric              (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
13530b57cec5SDimitry Andric         if (numPerLevel[d] & 1)
13540b57cec5SDimitry Andric           numPerLevel[d]++;
13550b57cec5SDimitry Andric         numPerLevel[d] = numPerLevel[d] >> 1;
13560b57cec5SDimitry Andric         if (numPerLevel[d + 1] == 1)
13570b57cec5SDimitry Andric           depth++;
13580b57cec5SDimitry Andric         numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
13590b57cec5SDimitry Andric       }
13600b57cec5SDimitry Andric       if (numPerLevel[0] == 1) {
13610b57cec5SDimitry Andric         branch = branch >> 1;
13620b57cec5SDimitry Andric         if (branch < 4)
13630b57cec5SDimitry Andric           branch = minBranch;
13640b57cec5SDimitry Andric       }
13650b57cec5SDimitry Andric     }
13660b57cec5SDimitry Andric 
13670b57cec5SDimitry Andric     for (kmp_uint32 i = 1; i < depth; ++i)
13680b57cec5SDimitry Andric       skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
13690b57cec5SDimitry Andric     // Fill in hierarchy in the case of oversubscription
13700b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels; ++i)
13710b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
13720b57cec5SDimitry Andric 
13730b57cec5SDimitry Andric     uninitialized = initialized; // One writer
13740b57cec5SDimitry Andric   }
13750b57cec5SDimitry Andric 
13760b57cec5SDimitry Andric   // Resize the hierarchy if nproc changes to something larger than before
13770b57cec5SDimitry Andric   void resize(kmp_uint32 nproc) {
13780b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
13790b57cec5SDimitry Andric     while (bool_result == 0) { // someone else is trying to resize
13800b57cec5SDimitry Andric       KMP_CPU_PAUSE();
13810b57cec5SDimitry Andric       if (nproc <= base_num_threads) // happy with other thread's resize
13820b57cec5SDimitry Andric         return;
13830b57cec5SDimitry Andric       else // try to resize
13840b57cec5SDimitry Andric         bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
13850b57cec5SDimitry Andric     }
13860b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result != 0);
13870b57cec5SDimitry Andric     if (nproc <= base_num_threads)
13880b57cec5SDimitry Andric       return; // happy with other thread's resize
13890b57cec5SDimitry Andric 
13900b57cec5SDimitry Andric     // Calculate new maxLevels
13910b57cec5SDimitry Andric     kmp_uint32 old_sz = skipPerLevel[depth - 1];
13920b57cec5SDimitry Andric     kmp_uint32 incs = 0, old_maxLevels = maxLevels;
13930b57cec5SDimitry Andric     // First see if old maxLevels is enough to contain new size
13940b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
13950b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
13960b57cec5SDimitry Andric       numPerLevel[i - 1] *= 2;
13970b57cec5SDimitry Andric       old_sz *= 2;
13980b57cec5SDimitry Andric       depth++;
13990b57cec5SDimitry Andric     }
14000b57cec5SDimitry Andric     if (nproc > old_sz) { // Not enough space, need to expand hierarchy
14010b57cec5SDimitry Andric       while (nproc > old_sz) {
14020b57cec5SDimitry Andric         old_sz *= 2;
14030b57cec5SDimitry Andric         incs++;
14040b57cec5SDimitry Andric         depth++;
14050b57cec5SDimitry Andric       }
14060b57cec5SDimitry Andric       maxLevels += incs;
14070b57cec5SDimitry Andric 
14080b57cec5SDimitry Andric       // Resize arrays
14090b57cec5SDimitry Andric       kmp_uint32 *old_numPerLevel = numPerLevel;
14100b57cec5SDimitry Andric       kmp_uint32 *old_skipPerLevel = skipPerLevel;
14110b57cec5SDimitry Andric       numPerLevel = skipPerLevel = NULL;
14120b57cec5SDimitry Andric       numPerLevel =
14130b57cec5SDimitry Andric           (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
14140b57cec5SDimitry Andric       skipPerLevel = &(numPerLevel[maxLevels]);
14150b57cec5SDimitry Andric 
14160b57cec5SDimitry Andric       // Copy old elements from old arrays
1417e8d8bef9SDimitry Andric       for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1418e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
14190b57cec5SDimitry Andric         numPerLevel[i] = old_numPerLevel[i];
14200b57cec5SDimitry Andric         skipPerLevel[i] = old_skipPerLevel[i];
14210b57cec5SDimitry Andric       }
14220b57cec5SDimitry Andric 
14230b57cec5SDimitry Andric       // Init new elements in arrays to 1
1424e8d8bef9SDimitry Andric       for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1425e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
14260b57cec5SDimitry Andric         numPerLevel[i] = 1;
14270b57cec5SDimitry Andric         skipPerLevel[i] = 1;
14280b57cec5SDimitry Andric       }
14290b57cec5SDimitry Andric 
14300b57cec5SDimitry Andric       // Free old arrays
14310b57cec5SDimitry Andric       __kmp_free(old_numPerLevel);
14320b57cec5SDimitry Andric     }
14330b57cec5SDimitry Andric 
14340b57cec5SDimitry Andric     // Fill in oversubscription levels of hierarchy
14350b57cec5SDimitry Andric     for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
14360b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
14370b57cec5SDimitry Andric 
14380b57cec5SDimitry Andric     base_num_threads = nproc;
14390b57cec5SDimitry Andric     resizing = 0; // One writer
14400b57cec5SDimitry Andric   }
14410b57cec5SDimitry Andric };
14420b57cec5SDimitry Andric #endif // KMP_AFFINITY_H
1443