Lines Matching +full:layer +full:- +full:primary

2  * kmp_dispatch_hier.h -- hierarchical scheduling methods and data structures
5 //===----------------------------------------------------------------------===//
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
18 // Layer type for scheduling hierarchy
20 LAYER_THREAD = -1,
29 // Convert hierarchy type (LAYER_L1, LAYER_L2, etc.) to C-style string
61 void append(enum sched_type sched, kmp_int32 chunk, kmp_hier_layer_e layer) { in append()
76 layers[current_size] = layer; in append()
128 // Sizes of layer arrays bounded by max number of detected L1s, L2s, etc.
156 lb[1 - index] = nlb; in set_next_hand_thread()
157 ub[1 - index] = nub; in set_next_hand_thread()
158 st[1 - index] = nst; in set_next_hand_thread()
159 status[1 - index] = nstatus; in set_next_hand_thread()
162 lb[1 - index] = nlb; in set_next()
163 ub[1 - index] = nub; in set_next()
164 st[1 - index] = nst; in set_next()
165 status[1 - index] = nstatus; in set_next()
166 sh[1 - index].u.s.iteration = 0; in set_next()
170 return status[1 - index]; in get_next_status()
172 T get_next_lb(kmp_uint64 index) const { return lb[1 - index]; } in get_next_lb()
173 T get_next_ub(kmp_uint64 index) const { return ub[1 - index]; } in get_next_ub()
174 ST get_next_st(kmp_uint64 index) const { return st[1 - index]; } in get_next_st()
176 return &(sh[1 - index]); in get_next_sh()
253 tdata->num_active = num_active; in reset_private()
254 tdata->index = 0; in reset_private()
255 tdata->wait_val[0] = tdata->wait_val[1] = get_wait_val(num_active); in reset_private()
260 bdata->val[0] = bdata->val[1] = 0LL; in reset_shared()
261 bdata->status[0] = bdata->status[1] = 0LL; in reset_shared()
267 kmp_uint64 current_index = tdata->index; in barrier()
268 kmp_uint64 next_index = 1 - current_index; in barrier()
269 kmp_uint64 current_wait_value = tdata->wait_val[current_index]; in barrier()
271 (current_wait_value ? 0 : get_wait_val(tdata->num_active)); in barrier()
277 (RCAST(volatile char *, &(bdata->val[current_index])))[id] = v; in barrier()
278 __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value, in barrier()
280 tdata->wait_val[current_index] = next_wait_value; in barrier()
281 tdata->index = next_index; in barrier()
299 tdata->num_active = num_active; in reset_private()
300 tdata->index = 0; in reset_private()
301 tdata->wait_val[0] = tdata->wait_val[1] = (kmp_uint64)num_active; in reset_private()
306 bdata->val[0] = bdata->val[1] = 0LL; in reset_shared()
307 bdata->status[0] = bdata->status[1] = 0LL; in reset_shared()
314 kmp_uint64 current_index = tdata->index; in barrier()
315 kmp_uint64 next_index = 1 - current_index; in barrier()
316 kmp_uint64 current_wait_value = tdata->wait_val[current_index]; in barrier()
317 kmp_uint64 next_wait_value = current_wait_value + tdata->num_active; in barrier()
323 val = RCAST(volatile kmp_int64 *, &(bdata->val[current_index])); in barrier()
325 __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value, in barrier()
327 tdata->wait_val[current_index] = next_wait_value; in barrier()
328 tdata->index = next_index; in barrier()
331 // Data associated with topology unit within a layer
370 tdata->index = 1 - tdata->index; in barrier()
422 return &(hier_parent->hier_pr); in get_parent_pr()
438 // Information regarding a single layer within the scheduling hierarchy
447 // Print this layer's information
463 * layers. Layer 0 is the lowest layer to layer num_layers - 1 which is the
464 * highest layer.
466 * [ 2 ] -> [ L3 | L3 ]
467 * [ 1 ] -> [ L2 | L2 | L2 | L2 ]
468 * [ 0 ] -> [ L1 | L1 | L1 | L1 | L1 | L1 | L1 | L1 ]
470 * each layer
483 auto parent = current->get_parent(); in next_recurse()
484 bool last_layer = (hier_level == get_num_layers() - 1); in next_recurse()
486 kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[hier_level]); in next_recurse()
496 T hier_id = (T)current->get_hier_id(); in next_recurse()
499 KD_TRACE(1, ("kmp_hier_t.next_recurse(): T#%d (%d) is primary of unit\n", in next_recurse()
508 // last layer below the very top uses the single shared buffer in next_recurse()
514 th->th.th_dispatch->th_dispatch_sh_current); in next_recurse()
522 parent->get_curr_sh(th->th.th_hier_bar_data[hier_level + 1].index); in next_recurse()
523 nproc = (T)parent->get_num_active(); in next_recurse()
525 my_pr = current->get_my_pr(); in next_recurse()
538 // layer, attempt to go up the hierarchy for more iterations in next_recurse()
550 &(th->th.th_hier_bar_data[hier_level + 1]); in next_recurse()
551 my_sh = parent->get_curr_sh(upper_tdata->index); in next_recurse()
555 parent->get_curr_lb(upper_tdata->index), in next_recurse()
556 parent->get_curr_ub(upper_tdata->index), in next_recurse()
557 parent->get_curr_st(upper_tdata->index), in next_recurse()
573 current->set_next(my_lb, my_ub, my_st, status, tdata->index); in next_recurse()
580 // last layer or our parent contains the last serial chunk, then the in next_recurse()
582 if (last_layer || parent->hier_pr.flags.contains_last) { in next_recurse()
586 current->hier_pr.flags.contains_last = contains_last; in next_recurse()
588 if (!current->hier_pr.flags.contains_last) in next_recurse()
593 } // if primary thread of this unit in next_recurse()
598 current->barrier(previous_id, tdata); in next_recurse()
601 gtid, hier_level, current->get_curr_status(tdata->index))); in next_recurse()
606 return current->get_curr_status(tdata->index); in next_recurse()
652 // and new_chunks. These should come pre-sorted according to
679 kmp_hier_layer_e layer = new_layers[i]; in allocate_hier() local
681 info[i].type = layer; in allocate_hier()
684 max = __kmp_hier_max_units[layer + 1]; in allocate_hier()
687 KMP_WARNING(HierSchedInvalid, __kmp_get_hier_str(layer)); in allocate_hier()
701 // loc - source file location
702 // gtid - global thread identifier
703 // pr - this thread's private dispatch buffer (corresponding with gtid)
704 // p_last (return value) - pointer to flag indicating this set of iterations
707 // p_lb (return value) - lower bound for this chunk of iterations
708 // p_ub (return value) - upper bound for this chunk of iterations
709 // p_st (return value) - stride for this chunk of iterations
717 kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[0]); in next()
718 auto parent = pr->get_parent(); in next()
723 T nproc = (T)parent->get_num_active(); in next()
724 T unit_id = (T)pr->get_hier_id(); in next()
743 // modified and read by the primary thread on that level. Because of in next()
745 auto sh = &(parent->hier_barrier.sh[0]); in next()
758 __kmp_dispatch_init_algorithm(loc, gtid, pr, pr->schedule, in next()
759 parent->get_next_lb(tdata->index), in next()
760 parent->get_next_ub(tdata->index), in next()
761 parent->get_next_st(tdata->index), in next()
765 pr->u.p.parm1, nproc, unit_id); in next()
766 sh->u.s.iteration = 0; in next()
786 parent->set_next_hand_thread(*p_lb, *p_ub, *p_st, status, tdata->index); in next()
787 } // if primary thread of lowest unit level in next()
788 parent->barrier(pr->get_hier_id(), tdata); in next()
790 *p_lb = parent->get_curr_lb(tdata->index); in next()
791 *p_ub = parent->get_curr_ub(tdata->index); in next()
792 *p_st = parent->get_curr_st(tdata->index); in next()
793 status = parent->get_curr_status(tdata->index); in next()
798 auto sh = parent->get_curr_sh(tdata->index); in next()
815 sh = parent->get_curr_sh(tdata->index); in next()
816 __kmp_dispatch_init_algorithm(loc, gtid, pr, pr->schedule, in next()
817 parent->get_curr_lb(tdata->index), in next()
818 parent->get_curr_ub(tdata->index), in next()
819 parent->get_curr_st(tdata->index), in next()
823 pr->u.p.parm1, nproc, unit_id); in next()
842 if (contains_last && !parent->hier_pr.flags.contains_last) { in next()
854 // These functions probe the layer info structure
895 // Returns the number of threads in the top layer
905 for (int i = num_layers - 1; i >= 0; --i) { in print()
909 for (int i = num_layers - 1; i >= 0; --i) { in print()
910 KD_TRACE(10, ("Layer[%d] =\n", i)); in print()
935 KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d called: %d layer(s)\n", in __kmp_dispatch_init_hierarchy()
938 const char *layer = __kmp_get_hier_str(new_layers[i]); in __kmp_dispatch_init_hierarchy() local
941 gtid, i, layer, i, (int)new_scheds[i], i, new_chunks[i])); in __kmp_dispatch_init_hierarchy()
953 team = th->th.th_team; in __kmp_dispatch_init_hierarchy()
954 active = !team->t.t_serialized; in __kmp_dispatch_init_hierarchy()
955 th->th.th_ident = loc; in __kmp_dispatch_init_hierarchy()
957 KMP_DEBUG_ASSERT(th->th.th_dispatch == in __kmp_dispatch_init_hierarchy()
958 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); in __kmp_dispatch_init_hierarchy()
959 my_buffer_index = th->th.th_dispatch->th_disp_index; in __kmp_dispatch_init_hierarchy()
961 &th->th.th_dispatch in __kmp_dispatch_init_hierarchy()
962 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); in __kmp_dispatch_init_hierarchy()
964 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); in __kmp_dispatch_init_hierarchy()
970 pr->flags.use_hier = FALSE; in __kmp_dispatch_init_hierarchy()
971 pr->flags.contains_last = FALSE; in __kmp_dispatch_init_hierarchy()
976 pr->flags.use_hier = TRUE; in __kmp_dispatch_init_hierarchy()
977 pr->u.p.tc = 0; in __kmp_dispatch_init_hierarchy()
978 // Have primary thread allocate the hierarchy in __kmp_dispatch_init_hierarchy()
983 if (sh->hier == NULL) { in __kmp_dispatch_init_hierarchy()
984 sh->hier = (kmp_hier_t<T> *)__kmp_allocate(sizeof(kmp_hier_t<T>)); in __kmp_dispatch_init_hierarchy()
986 sh->hier->allocate_hier(n, new_layers, new_scheds, new_chunks); in __kmp_dispatch_init_hierarchy()
987 sh->u.s.iteration = 0; in __kmp_dispatch_init_hierarchy()
991 kmp_hier_t<T> *hier = sh->hier; in __kmp_dispatch_init_hierarchy()
992 if (!sh->hier->is_valid()) { in __kmp_dispatch_init_hierarchy()
993 pr->flags.use_hier = FALSE; in __kmp_dispatch_init_hierarchy()
996 // Have threads allocate their thread-private barrier data if it hasn't in __kmp_dispatch_init_hierarchy()
998 if (th->th.th_hier_bar_data == NULL) { in __kmp_dispatch_init_hierarchy()
999 th->th.th_hier_bar_data = (kmp_hier_private_bdata_t *)__kmp_allocate( in __kmp_dispatch_init_hierarchy()
1006 int index = __kmp_dispatch_get_index(tid, hier->get_type(i)); in __kmp_dispatch_init_hierarchy()
1007 kmp_hier_top_unit_t<T> *my_unit = hier->get_unit(i, index); in __kmp_dispatch_init_hierarchy()
1010 pr->hier_parent = my_unit; in __kmp_dispatch_init_hierarchy()
1012 if (my_unit->is_active()) { in __kmp_dispatch_init_hierarchy()
1015 gtid, my_unit, my_unit->active)); in __kmp_dispatch_init_hierarchy()
1016 KMP_TEST_THEN_INC32(&(my_unit->active)); in __kmp_dispatch_init_hierarchy()
1020 if (KMP_COMPARE_AND_STORE_ACQ32(&(my_unit->active), 0, 1)) { in __kmp_dispatch_init_hierarchy()
1022 if (i < n - 1) { in __kmp_dispatch_init_hierarchy()
1023 // Setup middle layer pointers to parents in __kmp_dispatch_init_hierarchy()
1024 my_unit->get_my_pr()->hier_id = in __kmp_dispatch_init_hierarchy()
1025 index % __kmp_dispatch_get_t1_per_t2(hier->get_type(i), in __kmp_dispatch_init_hierarchy()
1026 hier->get_type(i + 1)); in __kmp_dispatch_init_hierarchy()
1027 int parent_index = __kmp_dispatch_get_index(tid, hier->get_type(i + 1)); in __kmp_dispatch_init_hierarchy()
1028 my_unit->hier_parent = hier->get_unit(i + 1, parent_index); in __kmp_dispatch_init_hierarchy()
1030 // Setup top layer information (no parent pointers are set) in __kmp_dispatch_init_hierarchy()
1031 my_unit->get_my_pr()->hier_id = in __kmp_dispatch_init_hierarchy()
1032 index % __kmp_dispatch_get_t1_per_t2(hier->get_type(i), in __kmp_dispatch_init_hierarchy()
1034 KMP_TEST_THEN_INC32(&(hier->top_level_nproc)); in __kmp_dispatch_init_hierarchy()
1035 my_unit->hier_parent = nullptr; in __kmp_dispatch_init_hierarchy()
1039 my_unit->get_my_pr()->u.p.tc = 0; in __kmp_dispatch_init_hierarchy()
1040 // Increment this layer's number of active units in __kmp_dispatch_init_hierarchy()
1041 KMP_TEST_THEN_INC32(&(hier->info[i].num_active)); in __kmp_dispatch_init_hierarchy()
1046 KMP_TEST_THEN_INC32(&(my_unit->active)); in __kmp_dispatch_init_hierarchy()
1052 kmp_hier_layer_e::LAYER_THREAD, hier->get_type(0)); in __kmp_dispatch_init_hierarchy()
1053 pr->hier_id = tid % num_threads_per_layer1; in __kmp_dispatch_init_hierarchy()
1057 pr->hier_id += ((tid / num_hw_threads) * num_threads_per_layer1); in __kmp_dispatch_init_hierarchy()
1060 gtid, pr->hier_id)); in __kmp_dispatch_init_hierarchy()
1062 pr->flags.contains_last = FALSE; in __kmp_dispatch_init_hierarchy()
1066 // the barrier data for each unit can be initialized and the last layer's in __kmp_dispatch_init_hierarchy()
1068 int prev_id = pr->get_hier_id(); in __kmp_dispatch_init_hierarchy()
1072 int index = __kmp_dispatch_get_index(tid, hier->get_type(i)); in __kmp_dispatch_init_hierarchy()
1073 kmp_hier_top_unit_t<T> *my_unit = hier->get_unit(i, index); in __kmp_dispatch_init_hierarchy()
1074 // Only primary threads of this unit within the hierarchy do initialization in __kmp_dispatch_init_hierarchy()
1077 my_unit->reset_shared_barrier(); in __kmp_dispatch_init_hierarchy()
1078 my_unit->hier_pr.flags.contains_last = FALSE; in __kmp_dispatch_init_hierarchy()
1079 // Last layer, initialize the private buffers with entire loop information in __kmp_dispatch_init_hierarchy()
1082 if (i == n - 1) { in __kmp_dispatch_init_hierarchy()
1084 loc, gtid, my_unit->get_my_pr(), hier->get_sched(i), lb, ub, st, in __kmp_dispatch_init_hierarchy()
1088 hier->get_chunk(i), hier->get_num_active(i), my_unit->get_hier_id()); in __kmp_dispatch_init_hierarchy()
1090 prev_id = my_unit->get_hier_id(); in __kmp_dispatch_init_hierarchy()
1092 // Initialize each layer of the thread's private barrier data in __kmp_dispatch_init_hierarchy()
1093 kmp_hier_top_unit_t<T> *unit = pr->hier_parent; in __kmp_dispatch_init_hierarchy()
1094 for (int i = 0; i < n && unit; ++i, unit = unit->get_parent()) { in __kmp_dispatch_init_hierarchy()
1095 kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[i]); in __kmp_dispatch_init_hierarchy()
1096 unit->reset_private_barrier(tdata); in __kmp_dispatch_init_hierarchy()
1105 gtid, i, hier->get_num_active(i))); in __kmp_dispatch_init_hierarchy()
1107 hier->print(); in __kmp_dispatch_init_hierarchy()