xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_collapse.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric /*
206c3fb27SDimitry Andric  * kmp_collapse.h -- header for loop collapse feature
306c3fb27SDimitry Andric  */
406c3fb27SDimitry Andric 
506c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
806c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
906c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1006c3fb27SDimitry Andric //
1106c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
1206c3fb27SDimitry Andric 
1306c3fb27SDimitry Andric #ifndef KMP_COLLAPSE_H
1406c3fb27SDimitry Andric #define KMP_COLLAPSE_H
1506c3fb27SDimitry Andric 
1606c3fb27SDimitry Andric #include <type_traits>
1706c3fb27SDimitry Andric 
1806c3fb27SDimitry Andric // Type of the index into the loop nest structures
1906c3fb27SDimitry Andric // (with values from 0 to less than n from collapse(n))
2006c3fb27SDimitry Andric typedef kmp_int32 kmp_index_t;
2106c3fb27SDimitry Andric 
2206c3fb27SDimitry Andric // Type for combined loop nest space IV:
2306c3fb27SDimitry Andric typedef kmp_uint64 kmp_loop_nest_iv_t;
2406c3fb27SDimitry Andric 
2506c3fb27SDimitry Andric // Loop has <, <=, etc. as a comparison:
2606c3fb27SDimitry Andric enum comparison_t : kmp_int32 {
2706c3fb27SDimitry Andric   comp_less_or_eq = 0,
2806c3fb27SDimitry Andric   comp_greater_or_eq = 1,
2906c3fb27SDimitry Andric   comp_not_eq = 2,
3006c3fb27SDimitry Andric   comp_less = 3,
3106c3fb27SDimitry Andric   comp_greater = 4
3206c3fb27SDimitry Andric };
3306c3fb27SDimitry Andric 
3406c3fb27SDimitry Andric // Type of loop IV.
3506c3fb27SDimitry Andric // Type of bounds and step, after usual promotions
3606c3fb27SDimitry Andric // are a subset of these types (32 & 64 only):
3706c3fb27SDimitry Andric enum loop_type_t : kmp_int32 {
3806c3fb27SDimitry Andric   loop_type_uint8 = 0,
3906c3fb27SDimitry Andric   loop_type_int8 = 1,
4006c3fb27SDimitry Andric   loop_type_uint16 = 2,
4106c3fb27SDimitry Andric   loop_type_int16 = 3,
4206c3fb27SDimitry Andric   loop_type_uint32 = 4,
4306c3fb27SDimitry Andric   loop_type_int32 = 5,
4406c3fb27SDimitry Andric   loop_type_uint64 = 6,
4506c3fb27SDimitry Andric   loop_type_int64 = 7
4606c3fb27SDimitry Andric };
4706c3fb27SDimitry Andric 
48*0fca6ea1SDimitry Andric // Defining loop types to handle special cases
49*0fca6ea1SDimitry Andric enum nested_loop_type_t : kmp_int32 {
50*0fca6ea1SDimitry Andric   nested_loop_type_unkown = 0,
51*0fca6ea1SDimitry Andric   nested_loop_type_lower_triangular_matrix = 1,
52*0fca6ea1SDimitry Andric   nested_loop_type_upper_triangular_matrix = 2
53*0fca6ea1SDimitry Andric };
54*0fca6ea1SDimitry Andric 
5506c3fb27SDimitry Andric /*!
5606c3fb27SDimitry Andric  @ingroup WORK_SHARING
5706c3fb27SDimitry Andric  * Describes the structure for rectangular nested loops.
5806c3fb27SDimitry Andric  */
5906c3fb27SDimitry Andric template <typename T> struct bounds_infoXX_template {
6006c3fb27SDimitry Andric 
6106c3fb27SDimitry Andric   // typedef typename traits_t<T>::unsigned_t UT;
6206c3fb27SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
6306c3fb27SDimitry Andric 
6406c3fb27SDimitry Andric   loop_type_t loop_type; // The differentiator
6506c3fb27SDimitry Andric   loop_type_t loop_iv_type;
6606c3fb27SDimitry Andric   comparison_t comparison;
6706c3fb27SDimitry Andric   // outer_iv should be 0 (or any other less then number of dimentions)
6806c3fb27SDimitry Andric   // if loop doesn't depend on it (lb1 and ub1 will be 0).
6906c3fb27SDimitry Andric   // This way we can do multiplication without a check.
7006c3fb27SDimitry Andric   kmp_index_t outer_iv;
7106c3fb27SDimitry Andric 
7206c3fb27SDimitry Andric   // unions to keep the size constant:
7306c3fb27SDimitry Andric   union {
7406c3fb27SDimitry Andric     T lb0;
7506c3fb27SDimitry Andric     kmp_uint64 lb0_u64; // real type can be signed
7606c3fb27SDimitry Andric   };
7706c3fb27SDimitry Andric 
7806c3fb27SDimitry Andric   union {
7906c3fb27SDimitry Andric     T lb1;
8006c3fb27SDimitry Andric     kmp_uint64 lb1_u64; // real type can be signed
8106c3fb27SDimitry Andric   };
8206c3fb27SDimitry Andric 
8306c3fb27SDimitry Andric   union {
8406c3fb27SDimitry Andric     T ub0;
8506c3fb27SDimitry Andric     kmp_uint64 ub0_u64; // real type can be signed
8606c3fb27SDimitry Andric   };
8706c3fb27SDimitry Andric 
8806c3fb27SDimitry Andric   union {
8906c3fb27SDimitry Andric     T ub1;
9006c3fb27SDimitry Andric     kmp_uint64 ub1_u64; // real type can be signed
9106c3fb27SDimitry Andric   };
9206c3fb27SDimitry Andric 
9306c3fb27SDimitry Andric   union {
9406c3fb27SDimitry Andric     ST step; // signed even if bounds type is unsigned
9506c3fb27SDimitry Andric     kmp_int64 step_64; // signed
9606c3fb27SDimitry Andric   };
9706c3fb27SDimitry Andric 
9806c3fb27SDimitry Andric   kmp_loop_nest_iv_t trip_count;
9906c3fb27SDimitry Andric };
10006c3fb27SDimitry Andric 
10106c3fb27SDimitry Andric /*!
10206c3fb27SDimitry Andric  @ingroup WORK_SHARING
10306c3fb27SDimitry Andric  * Interface struct for rectangular nested loops.
10406c3fb27SDimitry Andric  * Same size as bounds_infoXX_template.
10506c3fb27SDimitry Andric  */
10606c3fb27SDimitry Andric struct bounds_info_t {
10706c3fb27SDimitry Andric 
10806c3fb27SDimitry Andric   loop_type_t loop_type; // The differentiator
10906c3fb27SDimitry Andric   loop_type_t loop_iv_type;
11006c3fb27SDimitry Andric   comparison_t comparison;
11106c3fb27SDimitry Andric   // outer_iv should be 0  (or any other less then number of dimentions)
11206c3fb27SDimitry Andric   // if loop doesn't depend on it (lb1 and ub1 will be 0).
11306c3fb27SDimitry Andric   // This way we can do multiplication without a check.
11406c3fb27SDimitry Andric   kmp_index_t outer_iv;
11506c3fb27SDimitry Andric 
11606c3fb27SDimitry Andric   kmp_uint64 lb0_u64; // real type can be signed
11706c3fb27SDimitry Andric   kmp_uint64 lb1_u64; // real type can be signed
11806c3fb27SDimitry Andric   kmp_uint64 ub0_u64; // real type can be signed
11906c3fb27SDimitry Andric   kmp_uint64 ub1_u64; // real type can be signed
12006c3fb27SDimitry Andric   kmp_int64 step_64; // signed
12106c3fb27SDimitry Andric 
12206c3fb27SDimitry Andric   // This is internal, but it's the only internal thing we need
12306c3fb27SDimitry Andric   // in rectangular case, so let's expose it here:
12406c3fb27SDimitry Andric   kmp_loop_nest_iv_t trip_count;
12506c3fb27SDimitry Andric };
12606c3fb27SDimitry Andric 
12706c3fb27SDimitry Andric //-------------------------------------------------------------------------
12806c3fb27SDimitry Andric // Additional types for internal representation:
12906c3fb27SDimitry Andric 
13006c3fb27SDimitry Andric // Array for a point in the loop space, in the original space.
13106c3fb27SDimitry Andric // It's represented in kmp_uint64, but each dimention is calculated in
13206c3fb27SDimitry Andric // that loop IV type. Also dimentions have to be converted to those types
13306c3fb27SDimitry Andric // when used in generated code.
13406c3fb27SDimitry Andric typedef kmp_uint64 *kmp_point_t;
13506c3fb27SDimitry Andric 
13606c3fb27SDimitry Andric // Array: Number of loop iterations on each nesting level to achieve some point,
13706c3fb27SDimitry Andric // in expanded space or in original space.
13806c3fb27SDimitry Andric // OMPTODO: move from using iterations to using offsets (iterations multiplied
13906c3fb27SDimitry Andric // by steps). For those we need to be careful with the types, as step can be
14006c3fb27SDimitry Andric // negative, but it'll remove multiplications and divisions in several places.
14106c3fb27SDimitry Andric typedef kmp_loop_nest_iv_t *kmp_iterations_t;
14206c3fb27SDimitry Andric 
14306c3fb27SDimitry Andric // Internal struct with additional info:
14406c3fb27SDimitry Andric template <typename T> struct bounds_info_internalXX_template {
14506c3fb27SDimitry Andric 
14606c3fb27SDimitry Andric   // OMPTODO: should span have type T or should it better be
14706c3fb27SDimitry Andric   // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than
14806c3fb27SDimitry Andric   // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to
14906c3fb27SDimitry Andric   // use big_span_t, if it can be resolved at compile time.
15006c3fb27SDimitry Andric   typedef
15106c3fb27SDimitry Andric       typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>
15206c3fb27SDimitry Andric           big_span_t;
15306c3fb27SDimitry Andric 
15406c3fb27SDimitry Andric   // typedef typename big_span_t span_t;
15506c3fb27SDimitry Andric   typedef T span_t;
15606c3fb27SDimitry Andric 
15706c3fb27SDimitry Andric   bounds_infoXX_template<T> b; // possibly adjusted bounds
15806c3fb27SDimitry Andric 
15906c3fb27SDimitry Andric   // Leaving this as a union in case we'll switch to span_t with different sizes
16006c3fb27SDimitry Andric   // (depending on T)
16106c3fb27SDimitry Andric   union {
16206c3fb27SDimitry Andric     // Smallest possible value of iv (may be smaller than actually possible)
16306c3fb27SDimitry Andric     span_t span_smallest;
16406c3fb27SDimitry Andric     kmp_uint64 span_smallest_u64;
16506c3fb27SDimitry Andric   };
16606c3fb27SDimitry Andric 
16706c3fb27SDimitry Andric   // Leaving this as a union in case we'll switch to span_t with different sizes
16806c3fb27SDimitry Andric   // (depending on T)
16906c3fb27SDimitry Andric   union {
17006c3fb27SDimitry Andric     // Biggest possible value of iv (may be bigger than actually possible)
17106c3fb27SDimitry Andric     span_t span_biggest;
17206c3fb27SDimitry Andric     kmp_uint64 span_biggest_u64;
17306c3fb27SDimitry Andric   };
17406c3fb27SDimitry Andric 
17506c3fb27SDimitry Andric   // Did we adjust loop bounds (not counting canonicalization)?
17606c3fb27SDimitry Andric   bool loop_bounds_adjusted;
17706c3fb27SDimitry Andric };
17806c3fb27SDimitry Andric 
17906c3fb27SDimitry Andric // Internal struct with additional info:
18006c3fb27SDimitry Andric struct bounds_info_internal_t {
18106c3fb27SDimitry Andric 
18206c3fb27SDimitry Andric   bounds_info_t b; // possibly adjusted bounds
18306c3fb27SDimitry Andric 
18406c3fb27SDimitry Andric   // Smallest possible value of iv (may be smaller than actually possible)
18506c3fb27SDimitry Andric   kmp_uint64 span_smallest_u64;
18606c3fb27SDimitry Andric 
18706c3fb27SDimitry Andric   // Biggest possible value of iv (may be bigger than actually possible)
18806c3fb27SDimitry Andric   kmp_uint64 span_biggest_u64;
18906c3fb27SDimitry Andric 
19006c3fb27SDimitry Andric   // Did we adjust loop bounds (not counting canonicalization)?
19106c3fb27SDimitry Andric   bool loop_bounds_adjusted;
19206c3fb27SDimitry Andric };
19306c3fb27SDimitry Andric 
19406c3fb27SDimitry Andric //----------APIs for rectangular loop nests--------------------------------
19506c3fb27SDimitry Andric 
19606c3fb27SDimitry Andric // Canonicalize loop nest and calculate overall trip count.
19706c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread.
19806c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form
19906c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a
20006c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array
20106c3fb27SDimitry Andric // (only trip counts will be calculated).
20206c3fb27SDimitry Andric // Returns trip count of overall space.
20306c3fb27SDimitry Andric extern "C" kmp_loop_nest_iv_t
20406c3fb27SDimitry Andric __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
20506c3fb27SDimitry Andric                                  /*in/out*/ bounds_info_t *original_bounds_nest,
20606c3fb27SDimitry Andric                                  kmp_index_t n);
20706c3fb27SDimitry Andric 
20806c3fb27SDimitry Andric // Calculate old induction variables corresponding to overall new_iv.
20906c3fb27SDimitry Andric // Note: original IV will be returned as if it had kmp_uint64 type,
21006c3fb27SDimitry Andric // will have to be converted to original type in user code.
21106c3fb27SDimitry Andric // Note: trip counts should be already calculated by
21206c3fb27SDimitry Andric // __kmpc_process_loop_nest_rectang.
21306c3fb27SDimitry Andric // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
21406c3fb27SDimitry Andric // that into user code.
21506c3fb27SDimitry Andric extern "C" void
21606c3fb27SDimitry Andric __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
21706c3fb27SDimitry Andric                                  const bounds_info_t *original_bounds_nest,
21806c3fb27SDimitry Andric                                  /*out*/ kmp_uint64 *original_ivs,
21906c3fb27SDimitry Andric                                  kmp_index_t n);
22006c3fb27SDimitry Andric 
22106c3fb27SDimitry Andric //----------Init API for non-rectangular loops--------------------------------
22206c3fb27SDimitry Andric 
22306c3fb27SDimitry Andric // Init API for collapsed loops (static, no chunks defined).
22406c3fb27SDimitry Andric // "bounds_nest" has to be allocated per thread.
22506c3fb27SDimitry Andric // API will modify original bounds_nest array to bring it to a canonical form
22606c3fb27SDimitry Andric // (only <= and >=, no !=, <, >). If the original loop nest was already in a
22706c3fb27SDimitry Andric // canonical form there will be no changes to bounds in bounds_nest array
22806c3fb27SDimitry Andric // (only trip counts will be calculated). Internally API will expand the space
22906c3fb27SDimitry Andric // to parallelogram/parallelepiped, calculate total, calculate bounds for the
23006c3fb27SDimitry Andric // chunks in terms of the new IV, re-calc them in terms of old IVs (especially
23106c3fb27SDimitry Andric // important on the left side, to hit the lower bounds and not step over), and
23206c3fb27SDimitry Andric // pick the correct chunk for this thread (so it will calculate chunks up to the
23306c3fb27SDimitry Andric // needed one). It could be optimized to calculate just this chunk, potentially
23406c3fb27SDimitry Andric // a bit less well distributed among threads. It is designed to make sure that
23506c3fb27SDimitry Andric // threads will receive predictable chunks, deterministically (so that next nest
23606c3fb27SDimitry Andric // of loops with similar characteristics will get exactly same chunks on same
23706c3fb27SDimitry Andric // threads).
23806c3fb27SDimitry Andric // Current contract: chunk_bounds_nest has only lb0 and ub0,
23906c3fb27SDimitry Andric // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
24006c3fb27SDimitry Andric extern "C" kmp_int32
24106c3fb27SDimitry Andric __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
24206c3fb27SDimitry Andric                           /*in/out*/ bounds_info_t *original_bounds_nest,
24306c3fb27SDimitry Andric                           /*out*/ bounds_info_t *chunk_bounds_nest,
24406c3fb27SDimitry Andric                           kmp_index_t n,
24506c3fb27SDimitry Andric                           /*out*/ kmp_int32 *plastiter);
24606c3fb27SDimitry Andric 
24706c3fb27SDimitry Andric #endif // KMP_COLLAPSE_H
248