Lines Matching +full:layer +full:- +full:depth

2  * kmp_affinity.cpp -- affinity management
5 //===----------------------------------------------------------------------===//
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
62 return __kmp_topology->restrict_to_mask(mask);
79 kmp_uint32 depth;
89 depth = machine_hierarchy.depth;
90 KMP_DEBUG_ASSERT(depth > 0);
92 thr_bar->depth = depth;
93 __kmp_type_convert(machine_hierarchy.numPerLevel[0] - 1,
94 &(thr_bar->base_leaf_kids));
95 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
204 int depth = __kmp_topology->get_depth();
205 for (int level = 0; level < depth; ++level) {
206 if (ahwthread->ids[level] < bhwthread->ids[level])
207 return -1;
208 else if (ahwthread->ids[level] > bhwthread->ids[level])
211 if (ahwthread->os_id < bhwthread->os_id)
212 return -1;
213 else if (ahwthread->os_id > bhwthread->os_id)
223 int depth = __kmp_topology->get_depth();
224 int compact = __kmp_topology->compact;
226 KMP_DEBUG_ASSERT(compact <= depth);
228 int j = depth - i - 1;
229 if (aa->sub_ids[j] < bb->sub_ids[j])
230 return -1;
231 if (aa->sub_ids[j] > bb->sub_ids[j])
234 for (; i < depth; i++) {
235 int j = i - compact;
236 if (aa->sub_ids[j] < bb->sub_ids[j])
237 return -1;
238 if (aa->sub_ids[j] > bb->sub_ids[j])
246 int depth = __kmp_topology->get_depth();
248 for (int i = 0; i < depth; ++i) {
265 // Add a layer to the topology based on the ids. Assume the topology
268 // Figure out where the layer should go by comparing the ids of the current
274 // Start from the highest layer and work down to find target layer
275 // If new layer is equal to another layer then put the new layer above
276 for (target_layer = 0; target_layer < depth; ++target_layer) {
283 // Found the layer we are strictly above
288 // Found a layer we are below. Move to next layer and check.
299 // Found the layer we are above. Now move everything to accommodate the new
300 // layer. And put the new ids and type into the topology.
301 for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
305 for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
310 depth++;
332 __kmp_topology->sort_ids();
337 // This is done by having the layer take on the id = UNKNOWN_ID (-1)
356 while (top_index1 < depth - 1 && top_index2 < depth) {
388 // Select the layer to remove based on preference
400 // If all the indexes for the second (deeper) layer are the same.
401 // e.g., all are zero, then make sure to keep the first layer's ids
405 // the hw threads and removing the layer from types and depth
409 for (int d = remove_layer_ids; d < depth - 1; ++d)
412 for (int idx = remove_layer; idx < depth - 1; ++idx)
414 depth--;
419 KMP_ASSERT(depth > 0);
450 // Gather the count of each topology layer and the ratio
455 for (int i = 0; i < depth; ++i) {
464 for (int layer = 0; layer < depth; ++layer) {
465 int id = hw_thread.ids[layer];
466 if (id != previous_id[layer]) {
468 for (int l = layer; l < depth; ++l)
470 // Keep track of topology layer ratio statistics
471 max[layer]++;
472 for (int l = layer + 1; l < depth; ++l) {
479 if (__kmp_is_hybrid_cpu() && core_level >= 0 && layer <= core_level) {
482 // Because efficiencies can range from 0 to max efficiency - 1,
503 for (int layer = 0; layer < depth; ++layer) {
504 previous_id[layer] = hw_thread.ids[layer];
507 for (int layer = 0; layer < depth; ++layer) {
508 if (max[layer] > ratio[layer])
509 ratio[layer] = max[layer];
518 for (int i = 0; i < depth; ++i)
522 above_level = -1;
541 for (int level = 0; level < depth; ++level)
552 for (int level = 0; level < depth; ++level)
554 flags.uniform = (num == count[depth - 1]);
562 for (int i = 0; i < depth; ++i) {
563 previous_id[i] = -1;
564 sub_id[i] = -1;
569 for (int j = 0; j < depth; ++j) {
572 for (int k = j + 1; k < depth; ++k) {
579 for (int j = 0; j < depth; ++j) {
583 for (int j = 0; j < depth; ++j) {
594 if (package_level == -1)
600 KMP_ASSERT(core_level != -1);
601 KMP_ASSERT(thread_level != -1);
604 if (package_level != -1) {
626 retval->hw_threads = (kmp_hw_thread_t *)(bytes + sizeof(kmp_topology_t));
628 retval->hw_threads = nullptr;
630 retval->num_hw_threads = nproc;
631 retval->depth = ndepth;
634 retval->types = (kmp_hw_t *)arr;
635 retval->ratio = arr + (size_t)KMP_HW_LAST;
636 retval->count = arr + 2 * (size_t)KMP_HW_LAST;
637 retval->num_core_efficiencies = 0;
638 retval->num_core_types = 0;
639 retval->compact = 0;
641 retval->core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
642 KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
644 retval->types[i] = types[i];
645 retval->equivalent[types[i]] = types[i];
661 kmp_hw_thread_t &previous_thread = hw_threads[i - 1];
663 for (int j = 0; j < depth; ++j) {
680 printf("* depth: %d\n", depth);
683 for (int i = 0; i < depth; ++i)
688 for (int i = 0; i < depth; ++i) {
694 for (int i = 0; i < depth; ++i) {
710 printf("%-15s -> %-15s\n", key, value);
754 KMP_ASSERT(depth > 0 && depth <= (int)KMP_HW_LAST);
758 for (int level = 0; level < depth; ++level)
762 if (print_types[print_types_depth - 1] == KMP_HW_THREAD) {
765 print_types[print_types_depth - 1] = KMP_HW_CORE;
835 for (int level = 0; level < depth; ++level) {
858 // OMP_PLACES with cores:<attribute> but non-hybrid arch, use cores
864 affinity.gran_levels = -1;
869 // OMP_PLACES=core_types|core_effs but non-hybrid, use cores instead
881 affinity.gran_levels = -1;
926 for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i)
946 if (get_level(KMP_HW_L2) != -1)
948 else if (get_level(KMP_HW_TILE) != -1)
954 KMP_ASSERT(depth > 0);
955 for (int level = 0; level < depth; ++level) {
968 depth = ndepth;
970 for (int level = 0; level < depth; ++level) {
1018 __kmp_avail_proc--;
1036 __kmp_affin_origMask->copy(__kmp_affin_fullMask);
1050 __kmp_hw_subset->sort();
1052 __kmp_hw_subset->canonicalize(__kmp_topology);
1057 bool is_absolute = __kmp_hw_subset->is_absolute();
1058 int hw_subset_depth = __kmp_hw_subset->get_depth();
1066 const kmp_hw_subset_t::item_t &item = __kmp_hw_subset->at(i);
1074 // Check to see if current layer is in detected machine topology
1076 __kmp_hw_subset->at(i).type = equivalent_type;
1083 // Check to see if current layer has already been
1093 // Check to see if each layer's num & offset parameters are valid
1115 // Check if using a single core attribute on non-hybrid arch.
1118 // Check if using multiple core attributes on non-hyrbid arch.
1155 KMP_HNT(ValidValuesRange, 0, num_core_efficiencies - 1),
1169 int level_above = core_level - 1;
1228 abs_sub_ids[i] = -1;
1229 prev_sub_ids[i] = -1;
1232 core_eff_sub_ids[i] = -1;
1234 core_type_sub_ids[i] = -1;
1238 // Helpful to determine if a topology layer is targeted by an absolute subset
1246 // If not absolute KMP_HW_SUBSET, then every layer is seen as targeted
1312 const auto &hw_subset_item = __kmp_hw_subset->at(hw_subset_index);
1314 if (level == -1)
1385 if (hw_level >= depth)
1394 for (int i = 0; i < (depth - hw_level); ++i) {
1445 // The format is a comma separated list of non-negative integers or integer
1446 // ranges: e.g., 1,2,3-5,7,9-15
1456 char *end = buf + buf_len - 1;
1459 if (mask->begin() == mask->end()) {
1460 KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
1467 start = mask->begin();
1471 for (finish = mask->next(start), previous = start;
1472 finish == previous + 1 && finish != mask->end();
1473 finish = mask->next(finish)) {
1480 KMP_SNPRINTF(scan, end - scan + 1, "%s", ",");
1486 if (previous - start > 1) {
1487 KMP_SNPRINTF(scan, end - scan + 1, "%u-%u", start, previous);
1490 KMP_SNPRINTF(scan, end - scan + 1, "%u", start);
1492 if (previous - start > 0) {
1493 KMP_SNPRINTF(scan, end - scan + 1, ",%u", previous);
1499 if (start == mask->end())
1502 if (end - scan < 2)
1513 // The format is a comma separated list of non-negative integers or integer
1514 // ranges: e.g., 1,2,3-5,7,9-15
1526 if (mask->begin() == mask->end()) {
1532 start = mask->begin();
1536 for (finish = mask->next(start), previous = start;
1537 finish == previous + 1 && finish != mask->end();
1538 finish = mask->next(finish)) {
1550 if (previous - start > 1) {
1551 __kmp_str_buf_print(buf, "%u-%u", start, previous);
1555 if (previous - start > 0) {
1561 if (start == mask->end())
1585 // e.g., 1,2,4-7,9,11-15
1599 } else if (c == '-') {
1664 // internal topology object and set the layer ids for it. Each routine
1673 return hwloc_obj_type_is_cache(obj->type);
1675 return obj->type == HWLOC_OBJ_CACHE;
1683 if (obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION)
1685 switch (obj->attr->cache.depth) {
1701 switch (obj->type) {
1712 if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE)
1714 else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_TILE)
1716 else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_MODULE)
1718 else if (obj->attr->group.kind == HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP)
1738 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
1739 obj->logical_index, type, 0);
1741 obj->type, first) == obj;
1742 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
1754 hwloc_obj_type_t ltype = lower->type;
1755 int lindex = lower->logical_index - 1;
1760 hwloc_bitmap_isincluded(obj->cpuset, higher->cpuset)) {
1761 if (obj->userdata) {
1762 sub_id = (int)(RCAST(kmp_intptr_t, obj->userdata));
1766 lindex--;
1770 lower->userdata = RCAST(void *, sub_id + 1);
1777 int depth;
1808 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1829 cpukinds[idx].efficiency = -1;
1855 // Figure out the depth and types in the topology
1856 depth = 0;
1857 obj = hwloc_get_pu_obj_by_os_index(tp, __kmp_affin_fullMask->begin());
1860 if (obj->memory_arity) {
1862 for (memory = obj->memory_first_child; memory;
1864 if (memory->type == HWLOC_OBJ_NUMANODE)
1867 if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1868 types[depth] = KMP_HW_NUMA;
1869 hwloc_types[depth] = memory->type;
1870 depth++;
1876 types[depth] = type;
1877 hwloc_types[depth] = obj->type;
1878 depth++;
1880 obj = obj->parent;
1882 KMP_ASSERT(depth > 0);
1885 for (int i = 0, j = depth - 1; i < j; ++i, --j) {
1895 __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
1900 int index = depth - 1;
1901 bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
1902 kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
1905 hw_thread.ids[index] = pu->logical_index;
1906 hw_thread.os_id = pu->os_index;
1910 int cpukind_index = -1;
1923 index--;
1928 obj = obj->parent;
1933 if (obj->memory_arity) {
1935 for (memory = obj->memory_first_child; memory;
1937 if (memory->type == HWLOC_OBJ_NUMANODE)
1940 if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1943 hw_thread.ids[index] = memory->logical_index;
1945 index--;
1956 hw_thread.ids[index] = obj->logical_index;
1958 index--;
1975 __kmp_topology->sort_ids();
1982 // mapping of os thread id's <-> processor id's.
1985 int depth = 3;
2010 __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
2018 kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct);
2033 // If multiple Windows* OS processor groups exist, we can create a 2-level
2039 int depth = 3;
2058 __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
2066 kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct++);
2080 const kmp_uint32 SHIFT_LEFT = sizeof(kmp_uint32) * 8 - 1 - MSB;
2111 if (aa->pkgId < bb->pkgId)
2112 return -1;
2113 if (aa->pkgId > bb->pkgId)
2115 if (aa->coreId < bb->coreId)
2116 return -1;
2117 if (aa->coreId > bb->coreId)
2119 if (aa->threadId < bb->threadId)
2120 return -1;
2121 if (aa->threadId > bb->threadId)
2131 kmp_cache_info_t() : depth(0) { get_leaf4_levels(); }
2132 size_t get_depth() const { return depth; }
2152 size_t depth;
2157 while (depth < MAX_CACHE_LEVEL) {
2173 table[depth].level = cache_level;
2174 table[depth].mask = ((-1) << cache_mask_width);
2175 depth++;
2181 // On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
2203 // need to do something else - use the defaults that we calculated from
2245 // - Older OSes are usually found on machines with older chips, which do not
2247 // - The performance penalty for mistakenly identifying a machine as HT when
2253 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
2269 // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
2271 // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
2276 // Hyper-Threading Technology is supported by the chip but has been
2278 // On other OS/chip combinations supporting Intel(R) Hyper-Threading
2280 // Hyper-Threading Technology is disabled and 2 when it is enabled.
2281 // - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
2284 // bound, but the IA-32 architecture manual says that it is exactly the
2301 __kmp_affinity_dispatch->bind_thread(i);
2336 int widthT = widthCT - widthC;
2346 int maskC = (1 << widthC) - 1;
2349 int maskT = (1 << widthT) - 1;
2366 // [0 .. coresPerPkg-1] and threadId's are usually assigned
2367 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2370 // total # packages) are at this point - we want to determine that now. We
2388 // intra-pkg consist checks
2457 //(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
2458 int depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
2467 KMP_ASSERT(depth > 0);
2468 __kmp_topology = kmp_topology_t::allocate(nApics, depth, types);
2473 kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
2489 __kmp_topology->sort_ids();
2490 if (!__kmp_topology->check_ids()) {
2525 31-16 15-8 7-4 4-0
2526 ---+-----------+--------------+-------------+-----------------+
2528 ---+-----------|--------------+-------------+-----------------|
2530 ---+-----------|--------------+-------------------------------|
2532 ---+-----------+--------------+-------------------------------|
2534 ---+----------------------------------------------------------+
2604 // If it is an unknown level, then logically move the previous layer up
2606 levels[levels_index - 1].mask_width = mask_width;
2607 levels[levels_index - 1].nitems = nitems;
2613 // Ensure the INTEL_LEVEL_TYPE_INVALID (Socket) layer isn't first
2620 levels[i].mask = ~((-1) << levels[i].mask_width);
2621 levels[i].cache_mask = (-1) << levels[i].mask_width;
2626 levels[i].mask = (-1) << levels[i - 1].mask_width;
2685 topology_leaf = -1;
2699 if (topology_leaf == -1 || levels_index == 0) {
2708 // we need to do something else - use the defaults that we calculated from
2722 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
2727 int depth = levels_index;
2728 for (int i = depth - 1, j = 0; i >= 0; --i, ++j)
2742 if (hw_cache_mask == cache_mask && j < levels_index - 1) {
2745 __kmp_topology->set_equivalent_type(cache_type, type);
2771 __kmp_affinity_dispatch->bind_thread(proc);
2776 kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
2786 for (unsigned j = 0, idx = depth - 1; j < my_levels_index; ++j, --idx) {
2789 hw_thread.ids[idx] >>= my_levels[j - 1].mask_width;
2804 __kmp_topology->sort_ids();
2805 if (!__kmp_topology->check_ids()) {
2829 for (i = maxIndex;; i--) {
2831 return -1;
2887 // Return the index into the hierarchy for this tid and layer type (L1, L2, etc)
2956 buf[sizeof(buf) - 1] = 1;
2963 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
2968 // FIXME - this will match "node_<n> <garbage>"
3127 buf[sizeof(buf) - 1] = 1;
3143 } else if (!buf[sizeof(buf) - 1]) {
3182 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
3184 char *p = strchr(buf + sizeof(s1) - 1, ':');
3232 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
3234 char *p = strchr(buf + sizeof(s2) - 1, ':');
3244 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
3246 char *p = strchr(buf + sizeof(s3) - 1, ':');
3257 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
3259 char *p = strchr(buf + sizeof(s4) - 1, ':');
3271 char *p = strchr(buf + sizeof(s4) - 1, ':');
3286 // leading tokens that we don't recognize - if the line isn't empty, go on
3369 // [0 .. coresPerPkg-1] and threadId's are usually assigned
3370 // [0..threadsPerCore-1], we don't want to make any such assumptions.
3373 // total # packages) are at this point - we want to determine that now. We
3411 for (index = maxIndex; index >= threadIdIndex; index--) {
3413 // Auto-assign the thread id field if it wasn't specified.
3445 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
3451 // Auto-assign the thread id field if it wasn't specified.
3468 // Also, check that we waven't already restarted the loop (to be safe -
3490 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
3540 int depth = 0;
3543 int pkgLevel = -1;
3544 int coreLevel = -1;
3545 int threadLevel = -1;
3548 depth++;
3563 KMP_ASSERT(depth > 0);
3566 __kmp_topology = kmp_topology_t::allocate(num_avail, depth, types);
3571 kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
3576 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
3596 __kmp_topology->sort_ids();
3597 if (!__kmp_topology->check_ids()) {
3616 int numAddrs = __kmp_topology->get_num_hw_threads();
3617 int depth = __kmp_topology->get_depth();
3620 KMP_ASSERT(depth);
3622 i = find_next(-1);
3629 for (i = numAddrs - 1;; --i) {
3630 int osId = __kmp_topology->at(i).os_id;
3643 if (affinity.gran_levels >= (int)depth) {
3658 i = j = leader = find_next(-1);
3659 KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
3665 if (__kmp_topology->is_close(leader, i, affinity)) {
3666 KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
3673 int osId = __kmp_topology->at(j).os_id;
3677 __kmp_topology->at(j).leader = (j == leader);
3685 KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
3691 int osId = __kmp_topology->at(j).os_id;
3695 __kmp_topology->at(j).leader = (j == leader);
3703 __kmp_topology->print(env_var);
3710 // as file-static than to try and pass them through the calling sequence of
3711 // the recursive-descent OMP_PLACES parser.
3745 // Re-parse the proclist (for the explicit affinity type), and form the list
3753 int maxOsId = affinity.num_os_id_masks - 1;
3849 if (*next != '-') {
3860 // This is a range. Skip over the '-' and read in the 2nd int.
3861 next++; // skip '-'
3878 if (*next == '-') {
3879 sign = -1;
3898 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
3937 /*-----------------------------------------------------------------------------
3938 Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
3955 signed := - signed
3956 -----------------------------------------------------------------------------*/
4035 if (**scan == '-') {
4036 sign *= -1;
4037 (*scan)++; // skip '-'
4118 int maxOsId = affinity.num_os_id_masks - 1;
4183 if (*scan == '-') {
4184 sign *= -1;
4185 scan++; // skip '-'
4218 if (i < count - 1) {
4269 const kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
4270 for (int j = bottom_level; j > 0; j--) {
4272 if (core_level < (j - 1)) {
4273 core_level = j - 1;
4284 return __kmp_topology->get_count(core_level);
4290 KMP_DEBUG_ASSERT(proc >= 0 && proc < __kmp_topology->get_num_hw_threads());
4294 if (__kmp_topology->at(i + 1).sub_ids[j] !=
4295 __kmp_topology->at(i).sub_ids[j]) {
4311 int thread_level = __kmp_topology->get_level(KMP_HW_THREAD);
4312 return __kmp_topology->calculate_ratio(thread_level, core_level);
4333 int depth = __kmp_topology->get_depth();
4337 const kmp_hw_thread_t &hw_thread = __kmp_topology->at(osid_idx);
4338 for (int level = 0; level < depth; ++level) {
4339 kmp_hw_t type = __kmp_topology->get_type(level);
4347 for (; level < depth; ++level) {
4348 kmp_hw_t type = __kmp_topology->get_type(level);
4370 const kmp_affin_mask_t *mask = th->th.th_affin_mask;
4371 kmp_affinity_ids_t &ids = th->th.th_topology_ids;
4372 kmp_affinity_attrs_t &attrs = th->th.th_topology_attrs;
4390 int max_cpu = __kmp_affin_fullMask->get_max_cpu();
4391 int num_hw_threads = __kmp_topology->get_num_hw_threads();
4410 int os_id = __kmp_topology->at(hw_thread).os_id;
4426 if (__kmp_topology && __kmp_topology->get_num_hw_threads()) {
4427 machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
4440 KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
4449 // Create the "full" mask - this defines all of the processors that we
4469 __kmp_affin_origMask->copy(__kmp_affin_fullMask);
4505 __kmp_affin_origMask->copy(__kmp_affin_fullMask);
4509 __kmp_affin_fullMask->set_process_affinity(true);
4529 // In the default code path, errors are not fatal - we just try using
4534 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4594 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4660 __kmp_topology->canonicalize(nPackages, nCoresPerPkg,
4663 __kmp_topology->print(env_var);
4670 __kmp_topology->canonicalize();
4672 __kmp_topology->print(env_var);
4673 bool filtered = __kmp_topology->filter_hw_subset();
4675 __kmp_topology->print("KMP_HW_SUBSET");
4695 KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
4714 __kmp_topology->set_granularity(affinity);
4715 int depth = __kmp_topology->get_depth();
4719 int numAddrs = __kmp_topology->get_num_hw_threads();
4724 KMP_ASSERT(idx >= -1);
4726 if (__kmp_topology->at(i).attrs.contains(affinity.core_attr_gran))
4745 KMP_ASSERT(idx >= -1);
4787 if (affinity.compact >= depth) {
4800 if (affinity.compact >= depth) {
4803 affinity.compact = depth - 1 - affinity.compact;
4808 if (affinity.compact >= depth) {
4809 affinity.compact = depth - 1;
4814 if (depth <= 1 || is_hidden_helper_affinity) {
4820 } else if (!__kmp_topology->is_uniform()) {
4821 // Save the depth for further usage
4822 __kmp_aff_depth = depth;
4825 __kmp_affinity_find_core_level(__kmp_avail_proc, depth - 1);
4826 int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc, depth - 1,
4829 __kmp_avail_proc, depth - 1, core_level);
4842 procarr[i] = -1;
4845 int lastcore = -1;
4848 int proc = __kmp_topology->at(i).os_id;
4849 int core = __kmp_affinity_find_core(i, depth - 1, core_level);
4861 if (affinity.compact >= depth) {
4862 affinity.compact = depth - 1;
4866 // Allocate the gtid->affinity mask table.
4884 __kmp_topology->sort_compact(affinity);
4888 int num_hw_threads = __kmp_topology->get_num_hw_threads();
4891 if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) {
4894 int osId = __kmp_topology->at(i).os_id;
4908 __kmp_topology->print(env_var);
4912 __kmp_topology->sort_ids();
4943 if (affinity->masks != NULL)
4944 KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks);
4945 if (affinity->os_id_masks != NULL)
4946 KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks);
4947 if (affinity->proclist != NULL)
4948 __kmp_free(affinity->proclist);
4949 if (affinity->ids != NULL)
4950 __kmp_free(affinity->ids);
4951 if (affinity->attrs != NULL)
4952 __kmp_free(affinity->attrs);
4953 *affinity = KMP_AFFINITY_INIT(affinity->env_var);
5000 mask_idx = gtid - 2;
5003 KMP_DEBUG_ASSERT(affinity->num_masks > 0);
5004 *place = (mask_idx + affinity->offset) % affinity->num_masks;
5005 *mask = KMP_CPU_INDEX(affinity->masks, *place);
5008 // This function initializes the per-thread data concerning affinity including
5016 th->th.th_topology_ids.ids[id] = kmp_hw_thread_t::UNKNOWN_ID;
5017 th->th.th_topology_attrs = KMP_AFFINITY_ATTRS_UNKNOWN;
5023 if (th->th.th_affin_mask == NULL) {
5024 KMP_CPU_ALLOC(th->th.th_affin_mask);
5026 KMP_CPU_ZERO(th->th.th_affin_mask);
5045 if ((affinity->type == affinity_none) ||
5046 (affinity->type == affinity_balanced) ||
5074 th->th.th_current_place = i;
5076 th->th.th_new_place = i;
5077 th->th.th_first_place = 0;
5078 th->th.th_last_place = affinity->num_masks - 1;
5080 // When using a Non-OMP_PROC_BIND affinity method,
5081 // set all threads' place-partition-var to the entire place list
5082 th->th.th_first_place = 0;
5083 th->th.th_last_place = affinity->num_masks - 1;
5087 th->th.th_topology_ids = __kmp_affinity.ids[i];
5088 th->th.th_topology_attrs = __kmp_affinity.attrs[i];
5099 KMP_CPU_COPY(th->th.th_affin_mask, mask);
5117 if (affinity->flags.verbose && (affinity->type == affinity_none ||
5118 (th->th.th_current_place != KMP_PLACE_ALL &&
5119 affinity->type != affinity_balanced)) &&
5123 th->th.th_affin_mask);
5132 if (affinity->type == affinity_none) {
5133 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
5138 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
5152 gtid, th->th.th_new_place, th->th.th_current_place));
5155 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
5156 KMP_ASSERT(th->th.th_new_place >= 0);
5157 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity.num_masks);
5158 if (th->th.th_first_place <= th->th.th_last_place) {
5159 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
5160 (th->th.th_new_place <= th->th.th_last_place));
5162 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
5163 (th->th.th_new_place >= th->th.th_last_place));
5169 KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place);
5170 KMP_CPU_COPY(th->th.th_affin_mask, mask);
5171 th->th.th_current_place = th->th.th_new_place;
5176 th->th.th_affin_mask);
5180 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
5189 return -1;
5232 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
5235 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
5238 th->th.th_current_place = KMP_PLACE_UNDEFINED;
5239 th->th.th_new_place = KMP_PLACE_UNDEFINED;
5240 th->th.th_first_place = 0;
5241 th->th.th_last_place = __kmp_affinity.num_masks - 1;
5244 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
5256 return -1;
5265 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
5271 th->th.th_affin_mask);
5300 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
5320 return -1;
5341 return -1;
5344 return -2;
5353 return -1;
5374 return -1;
5377 return -2;
5386 return -1;
5407 return -1;
5420 int high = __kmp_topology->get_num_hw_threads() - 1;
5422 while (high - low > 1) {
5424 if (__kmp_topology->at(mid).attrs.get_core_type() ==
5431 if (__kmp_topology->at(mid).attrs.get_core_type() == KMP_HW_CORE_TYPE_ATOM) {
5434 return -1;
5438 // Dynamic affinity settings - Affinity balanced
5442 int tid = th->th.th_info.ds.ds_tid;
5466 if (__kmp_topology->is_uniform()) {
5479 // How many cores will have an additional thread bound to it - "big cores"
5487 coreID = (tid - big_cores) / chunk;
5488 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5493 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5498 __kmp_topology->at(coreID * __kmp_nth_per_core + threadID).os_id;
5503 osID = __kmp_topology->at(coreID * __kmp_nth_per_core + i).os_id;
5515 } else { // Non-uniform topology
5517 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5521 __kmp_affinity_find_core_level(__kmp_avail_proc, __kmp_aff_depth - 1);
5523 __kmp_aff_depth - 1, core_level);
5525 __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5531 int osID = __kmp_topology->at(tid).os_id;
5535 __kmp_affinity_find_core(tid, __kmp_aff_depth - 1, core_level);
5537 int osID = __kmp_topology->at(i).os_id;
5538 if (__kmp_affinity_find_core(i, __kmp_aff_depth - 1, core_level) ==
5551 if (procarr[i * nth_per_core + j] != -1) {
5560 if (osID != -1) {
5593 if (procarr[i * nth_per_core + j] != -1) {
5626 if (procarr[i * nth_per_core + k] != -1) {
5629 cnt--;
5630 nth--;
5635 cnt--;
5636 nth--;
5663 if (osID != -1) {
5694 // 4) Use non-OpenMP parallelization
5702 // -1 if we cannot bind thread
5707 // Do not touch non-omp threads
5709 "non-omp thread, returning\n"));
5710 return -1;
5715 return -1;