106c3fb27SDimitry Andric /* 206c3fb27SDimitry Andric * kmp_collapse.h -- header for loop collapse feature 306c3fb27SDimitry Andric */ 406c3fb27SDimitry Andric 506c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 806c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 906c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 1006c3fb27SDimitry Andric // 1106c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1206c3fb27SDimitry Andric 1306c3fb27SDimitry Andric #ifndef KMP_COLLAPSE_H 1406c3fb27SDimitry Andric #define KMP_COLLAPSE_H 1506c3fb27SDimitry Andric 1606c3fb27SDimitry Andric #include <type_traits> 1706c3fb27SDimitry Andric 1806c3fb27SDimitry Andric // Type of the index into the loop nest structures 1906c3fb27SDimitry Andric // (with values from 0 to less than n from collapse(n)) 2006c3fb27SDimitry Andric typedef kmp_int32 kmp_index_t; 2106c3fb27SDimitry Andric 2206c3fb27SDimitry Andric // Type for combined loop nest space IV: 2306c3fb27SDimitry Andric typedef kmp_uint64 kmp_loop_nest_iv_t; 2406c3fb27SDimitry Andric 2506c3fb27SDimitry Andric // Loop has <, <=, etc. as a comparison: 2606c3fb27SDimitry Andric enum comparison_t : kmp_int32 { 2706c3fb27SDimitry Andric comp_less_or_eq = 0, 2806c3fb27SDimitry Andric comp_greater_or_eq = 1, 2906c3fb27SDimitry Andric comp_not_eq = 2, 3006c3fb27SDimitry Andric comp_less = 3, 3106c3fb27SDimitry Andric comp_greater = 4 3206c3fb27SDimitry Andric }; 3306c3fb27SDimitry Andric 3406c3fb27SDimitry Andric // Type of loop IV. 3506c3fb27SDimitry Andric // Type of bounds and step, after usual promotions 3606c3fb27SDimitry Andric // are a subset of these types (32 & 64 only): 3706c3fb27SDimitry Andric enum loop_type_t : kmp_int32 { 3806c3fb27SDimitry Andric loop_type_uint8 = 0, 3906c3fb27SDimitry Andric loop_type_int8 = 1, 4006c3fb27SDimitry Andric loop_type_uint16 = 2, 4106c3fb27SDimitry Andric loop_type_int16 = 3, 4206c3fb27SDimitry Andric loop_type_uint32 = 4, 4306c3fb27SDimitry Andric loop_type_int32 = 5, 4406c3fb27SDimitry Andric loop_type_uint64 = 6, 4506c3fb27SDimitry Andric loop_type_int64 = 7 4606c3fb27SDimitry Andric }; 4706c3fb27SDimitry Andric 48*0fca6ea1SDimitry Andric // Defining loop types to handle special cases 49*0fca6ea1SDimitry Andric enum nested_loop_type_t : kmp_int32 { 50*0fca6ea1SDimitry Andric nested_loop_type_unkown = 0, 51*0fca6ea1SDimitry Andric nested_loop_type_lower_triangular_matrix = 1, 52*0fca6ea1SDimitry Andric nested_loop_type_upper_triangular_matrix = 2 53*0fca6ea1SDimitry Andric }; 54*0fca6ea1SDimitry Andric 5506c3fb27SDimitry Andric /*! 5606c3fb27SDimitry Andric @ingroup WORK_SHARING 5706c3fb27SDimitry Andric * Describes the structure for rectangular nested loops. 5806c3fb27SDimitry Andric */ 5906c3fb27SDimitry Andric template <typename T> struct bounds_infoXX_template { 6006c3fb27SDimitry Andric 6106c3fb27SDimitry Andric // typedef typename traits_t<T>::unsigned_t UT; 6206c3fb27SDimitry Andric typedef typename traits_t<T>::signed_t ST; 6306c3fb27SDimitry Andric 6406c3fb27SDimitry Andric loop_type_t loop_type; // The differentiator 6506c3fb27SDimitry Andric loop_type_t loop_iv_type; 6606c3fb27SDimitry Andric comparison_t comparison; 6706c3fb27SDimitry Andric // outer_iv should be 0 (or any other less then number of dimentions) 6806c3fb27SDimitry Andric // if loop doesn't depend on it (lb1 and ub1 will be 0). 6906c3fb27SDimitry Andric // This way we can do multiplication without a check. 7006c3fb27SDimitry Andric kmp_index_t outer_iv; 7106c3fb27SDimitry Andric 7206c3fb27SDimitry Andric // unions to keep the size constant: 7306c3fb27SDimitry Andric union { 7406c3fb27SDimitry Andric T lb0; 7506c3fb27SDimitry Andric kmp_uint64 lb0_u64; // real type can be signed 7606c3fb27SDimitry Andric }; 7706c3fb27SDimitry Andric 7806c3fb27SDimitry Andric union { 7906c3fb27SDimitry Andric T lb1; 8006c3fb27SDimitry Andric kmp_uint64 lb1_u64; // real type can be signed 8106c3fb27SDimitry Andric }; 8206c3fb27SDimitry Andric 8306c3fb27SDimitry Andric union { 8406c3fb27SDimitry Andric T ub0; 8506c3fb27SDimitry Andric kmp_uint64 ub0_u64; // real type can be signed 8606c3fb27SDimitry Andric }; 8706c3fb27SDimitry Andric 8806c3fb27SDimitry Andric union { 8906c3fb27SDimitry Andric T ub1; 9006c3fb27SDimitry Andric kmp_uint64 ub1_u64; // real type can be signed 9106c3fb27SDimitry Andric }; 9206c3fb27SDimitry Andric 9306c3fb27SDimitry Andric union { 9406c3fb27SDimitry Andric ST step; // signed even if bounds type is unsigned 9506c3fb27SDimitry Andric kmp_int64 step_64; // signed 9606c3fb27SDimitry Andric }; 9706c3fb27SDimitry Andric 9806c3fb27SDimitry Andric kmp_loop_nest_iv_t trip_count; 9906c3fb27SDimitry Andric }; 10006c3fb27SDimitry Andric 10106c3fb27SDimitry Andric /*! 10206c3fb27SDimitry Andric @ingroup WORK_SHARING 10306c3fb27SDimitry Andric * Interface struct for rectangular nested loops. 10406c3fb27SDimitry Andric * Same size as bounds_infoXX_template. 10506c3fb27SDimitry Andric */ 10606c3fb27SDimitry Andric struct bounds_info_t { 10706c3fb27SDimitry Andric 10806c3fb27SDimitry Andric loop_type_t loop_type; // The differentiator 10906c3fb27SDimitry Andric loop_type_t loop_iv_type; 11006c3fb27SDimitry Andric comparison_t comparison; 11106c3fb27SDimitry Andric // outer_iv should be 0 (or any other less then number of dimentions) 11206c3fb27SDimitry Andric // if loop doesn't depend on it (lb1 and ub1 will be 0). 11306c3fb27SDimitry Andric // This way we can do multiplication without a check. 11406c3fb27SDimitry Andric kmp_index_t outer_iv; 11506c3fb27SDimitry Andric 11606c3fb27SDimitry Andric kmp_uint64 lb0_u64; // real type can be signed 11706c3fb27SDimitry Andric kmp_uint64 lb1_u64; // real type can be signed 11806c3fb27SDimitry Andric kmp_uint64 ub0_u64; // real type can be signed 11906c3fb27SDimitry Andric kmp_uint64 ub1_u64; // real type can be signed 12006c3fb27SDimitry Andric kmp_int64 step_64; // signed 12106c3fb27SDimitry Andric 12206c3fb27SDimitry Andric // This is internal, but it's the only internal thing we need 12306c3fb27SDimitry Andric // in rectangular case, so let's expose it here: 12406c3fb27SDimitry Andric kmp_loop_nest_iv_t trip_count; 12506c3fb27SDimitry Andric }; 12606c3fb27SDimitry Andric 12706c3fb27SDimitry Andric //------------------------------------------------------------------------- 12806c3fb27SDimitry Andric // Additional types for internal representation: 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric // Array for a point in the loop space, in the original space. 13106c3fb27SDimitry Andric // It's represented in kmp_uint64, but each dimention is calculated in 13206c3fb27SDimitry Andric // that loop IV type. Also dimentions have to be converted to those types 13306c3fb27SDimitry Andric // when used in generated code. 13406c3fb27SDimitry Andric typedef kmp_uint64 *kmp_point_t; 13506c3fb27SDimitry Andric 13606c3fb27SDimitry Andric // Array: Number of loop iterations on each nesting level to achieve some point, 13706c3fb27SDimitry Andric // in expanded space or in original space. 13806c3fb27SDimitry Andric // OMPTODO: move from using iterations to using offsets (iterations multiplied 13906c3fb27SDimitry Andric // by steps). For those we need to be careful with the types, as step can be 14006c3fb27SDimitry Andric // negative, but it'll remove multiplications and divisions in several places. 14106c3fb27SDimitry Andric typedef kmp_loop_nest_iv_t *kmp_iterations_t; 14206c3fb27SDimitry Andric 14306c3fb27SDimitry Andric // Internal struct with additional info: 14406c3fb27SDimitry Andric template <typename T> struct bounds_info_internalXX_template { 14506c3fb27SDimitry Andric 14606c3fb27SDimitry Andric // OMPTODO: should span have type T or should it better be 14706c3fb27SDimitry Andric // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than 14806c3fb27SDimitry Andric // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to 14906c3fb27SDimitry Andric // use big_span_t, if it can be resolved at compile time. 15006c3fb27SDimitry Andric typedef 15106c3fb27SDimitry Andric typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64> 15206c3fb27SDimitry Andric big_span_t; 15306c3fb27SDimitry Andric 15406c3fb27SDimitry Andric // typedef typename big_span_t span_t; 15506c3fb27SDimitry Andric typedef T span_t; 15606c3fb27SDimitry Andric 15706c3fb27SDimitry Andric bounds_infoXX_template<T> b; // possibly adjusted bounds 15806c3fb27SDimitry Andric 15906c3fb27SDimitry Andric // Leaving this as a union in case we'll switch to span_t with different sizes 16006c3fb27SDimitry Andric // (depending on T) 16106c3fb27SDimitry Andric union { 16206c3fb27SDimitry Andric // Smallest possible value of iv (may be smaller than actually possible) 16306c3fb27SDimitry Andric span_t span_smallest; 16406c3fb27SDimitry Andric kmp_uint64 span_smallest_u64; 16506c3fb27SDimitry Andric }; 16606c3fb27SDimitry Andric 16706c3fb27SDimitry Andric // Leaving this as a union in case we'll switch to span_t with different sizes 16806c3fb27SDimitry Andric // (depending on T) 16906c3fb27SDimitry Andric union { 17006c3fb27SDimitry Andric // Biggest possible value of iv (may be bigger than actually possible) 17106c3fb27SDimitry Andric span_t span_biggest; 17206c3fb27SDimitry Andric kmp_uint64 span_biggest_u64; 17306c3fb27SDimitry Andric }; 17406c3fb27SDimitry Andric 17506c3fb27SDimitry Andric // Did we adjust loop bounds (not counting canonicalization)? 17606c3fb27SDimitry Andric bool loop_bounds_adjusted; 17706c3fb27SDimitry Andric }; 17806c3fb27SDimitry Andric 17906c3fb27SDimitry Andric // Internal struct with additional info: 18006c3fb27SDimitry Andric struct bounds_info_internal_t { 18106c3fb27SDimitry Andric 18206c3fb27SDimitry Andric bounds_info_t b; // possibly adjusted bounds 18306c3fb27SDimitry Andric 18406c3fb27SDimitry Andric // Smallest possible value of iv (may be smaller than actually possible) 18506c3fb27SDimitry Andric kmp_uint64 span_smallest_u64; 18606c3fb27SDimitry Andric 18706c3fb27SDimitry Andric // Biggest possible value of iv (may be bigger than actually possible) 18806c3fb27SDimitry Andric kmp_uint64 span_biggest_u64; 18906c3fb27SDimitry Andric 19006c3fb27SDimitry Andric // Did we adjust loop bounds (not counting canonicalization)? 19106c3fb27SDimitry Andric bool loop_bounds_adjusted; 19206c3fb27SDimitry Andric }; 19306c3fb27SDimitry Andric 19406c3fb27SDimitry Andric //----------APIs for rectangular loop nests-------------------------------- 19506c3fb27SDimitry Andric 19606c3fb27SDimitry Andric // Canonicalize loop nest and calculate overall trip count. 19706c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread. 19806c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form 19906c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a 20006c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array 20106c3fb27SDimitry Andric // (only trip counts will be calculated). 20206c3fb27SDimitry Andric // Returns trip count of overall space. 20306c3fb27SDimitry Andric extern "C" kmp_loop_nest_iv_t 20406c3fb27SDimitry Andric __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid, 20506c3fb27SDimitry Andric /*in/out*/ bounds_info_t *original_bounds_nest, 20606c3fb27SDimitry Andric kmp_index_t n); 20706c3fb27SDimitry Andric 20806c3fb27SDimitry Andric // Calculate old induction variables corresponding to overall new_iv. 20906c3fb27SDimitry Andric // Note: original IV will be returned as if it had kmp_uint64 type, 21006c3fb27SDimitry Andric // will have to be converted to original type in user code. 21106c3fb27SDimitry Andric // Note: trip counts should be already calculated by 21206c3fb27SDimitry Andric // __kmpc_process_loop_nest_rectang. 21306c3fb27SDimitry Andric // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline 21406c3fb27SDimitry Andric // that into user code. 21506c3fb27SDimitry Andric extern "C" void 21606c3fb27SDimitry Andric __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv, 21706c3fb27SDimitry Andric const bounds_info_t *original_bounds_nest, 21806c3fb27SDimitry Andric /*out*/ kmp_uint64 *original_ivs, 21906c3fb27SDimitry Andric kmp_index_t n); 22006c3fb27SDimitry Andric 22106c3fb27SDimitry Andric //----------Init API for non-rectangular loops-------------------------------- 22206c3fb27SDimitry Andric 22306c3fb27SDimitry Andric // Init API for collapsed loops (static, no chunks defined). 22406c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread. 22506c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form 22606c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a 22706c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array 22806c3fb27SDimitry Andric // (only trip counts will be calculated). Internally API will expand the space 22906c3fb27SDimitry Andric // to parallelogram/parallelepiped, calculate total, calculate bounds for the 23006c3fb27SDimitry Andric // chunks in terms of the new IV, re-calc them in terms of old IVs (especially 23106c3fb27SDimitry Andric // important on the left side, to hit the lower bounds and not step over), and 23206c3fb27SDimitry Andric // pick the correct chunk for this thread (so it will calculate chunks up to the 23306c3fb27SDimitry Andric // needed one). It could be optimized to calculate just this chunk, potentially 23406c3fb27SDimitry Andric // a bit less well distributed among threads. It is designed to make sure that 23506c3fb27SDimitry Andric // threads will receive predictable chunks, deterministically (so that next nest 23606c3fb27SDimitry Andric // of loops with similar characteristics will get exactly same chunks on same 23706c3fb27SDimitry Andric // threads). 23806c3fb27SDimitry Andric // Current contract: chunk_bounds_nest has only lb0 and ub0, 23906c3fb27SDimitry Andric // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future). 24006c3fb27SDimitry Andric extern "C" kmp_int32 24106c3fb27SDimitry Andric __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, 24206c3fb27SDimitry Andric /*in/out*/ bounds_info_t *original_bounds_nest, 24306c3fb27SDimitry Andric /*out*/ bounds_info_t *chunk_bounds_nest, 24406c3fb27SDimitry Andric kmp_index_t n, 24506c3fb27SDimitry Andric /*out*/ kmp_int32 *plastiter); 24606c3fb27SDimitry Andric 24706c3fb27SDimitry Andric #endif // KMP_COLLAPSE_H 248