xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/include/ompx.h.var (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15f757f3fSDimitry Andric//===----------------------------------------------------------------------===//
25f757f3fSDimitry Andric//
35f757f3fSDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f757f3fSDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
55f757f3fSDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65f757f3fSDimitry Andric//
75f757f3fSDimitry Andric//===----------------------------------------------------------------------===//
85f757f3fSDimitry Andric
95f757f3fSDimitry Andric#ifndef __OMPX_H
105f757f3fSDimitry Andric#define __OMPX_H
115f757f3fSDimitry Andric
12*0fca6ea1SDimitry Andric#ifdef __AMDGCN_WAVEFRONT_SIZE
13*0fca6ea1SDimitry Andric#define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
14*0fca6ea1SDimitry Andric#else
15*0fca6ea1SDimitry Andric#define __WARP_SIZE 32
16*0fca6ea1SDimitry Andric#endif
17*0fca6ea1SDimitry Andric
18*0fca6ea1SDimitry Andrictypedef unsigned long uint64_t;
19*0fca6ea1SDimitry Andric
205f757f3fSDimitry Andric#ifdef __cplusplus
215f757f3fSDimitry Andricextern "C" {
225f757f3fSDimitry Andric#endif
235f757f3fSDimitry Andric
245f757f3fSDimitry Andricint omp_get_ancestor_thread_num(int);
255f757f3fSDimitry Andricint omp_get_team_size(int);
265f757f3fSDimitry Andric
275f757f3fSDimitry Andric#ifdef __cplusplus
285f757f3fSDimitry Andric}
295f757f3fSDimitry Andric#endif
305f757f3fSDimitry Andric
315f757f3fSDimitry Andric/// Target kernel language extensions
325f757f3fSDimitry Andric///
335f757f3fSDimitry Andric/// These extensions exist for the host to allow fallback implementations,
345f757f3fSDimitry Andric/// however, they cannot be arbitrarily composed with OpenMP. If the rules of
355f757f3fSDimitry Andric/// the kernel language are followed, the host fallbacks should behave as
365f757f3fSDimitry Andric/// expected since the kernel is represented as 3 sequential outer loops, one
375f757f3fSDimitry Andric/// for each grid dimension, and three (nested) parallel loops, one for each
385f757f3fSDimitry Andric/// block dimension. This fallback is not supposed to be optimal and should be
395f757f3fSDimitry Andric/// configurable by the user.
405f757f3fSDimitry Andric///
415f757f3fSDimitry Andric///{
425f757f3fSDimitry Andric
435f757f3fSDimitry Andric#ifdef __cplusplus
445f757f3fSDimitry Andricextern "C" {
455f757f3fSDimitry Andric#endif
465f757f3fSDimitry Andric
475f757f3fSDimitry Andricenum {
485f757f3fSDimitry Andric  ompx_relaxed = __ATOMIC_RELAXED,
495f757f3fSDimitry Andric  ompx_aquire = __ATOMIC_ACQUIRE,
505f757f3fSDimitry Andric  ompx_release = __ATOMIC_RELEASE,
515f757f3fSDimitry Andric  ompx_acq_rel = __ATOMIC_ACQ_REL,
525f757f3fSDimitry Andric  ompx_seq_cst = __ATOMIC_SEQ_CST,
535f757f3fSDimitry Andric};
545f757f3fSDimitry Andric
555f757f3fSDimitry Andricenum {
565f757f3fSDimitry Andric  ompx_dim_x = 0,
575f757f3fSDimitry Andric  ompx_dim_y = 1,
585f757f3fSDimitry Andric  ompx_dim_z = 2,
595f757f3fSDimitry Andric};
605f757f3fSDimitry Andric
61*0fca6ea1SDimitry Andric// TODO: The following implementation is for host fallback. We need to disable
62*0fca6ea1SDimitry Andric// generation of host fallback in kernel language mode.
63*0fca6ea1SDimitry Andric#pragma omp begin declare variant match(device = {kind(cpu)})
64*0fca6ea1SDimitry Andric
655f757f3fSDimitry Andric/// ompx_{thread,block}_{id,dim}
665f757f3fSDimitry Andric///{
675f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE)                     \
685f757f3fSDimitry Andric  static inline int ompx_##NAME(int Dim) { return VALUE; }
695f757f3fSDimitry Andric
705f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id,
715f757f3fSDimitry Andric                                      omp_get_ancestor_thread_num(Dim + 1))
725f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, omp_get_team_size(Dim + 1))
735f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0)
745f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
755f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C
765f757f3fSDimitry Andric///}
775f757f3fSDimitry Andric
785f757f3fSDimitry Andric/// ompx_{sync_block}_{,divergent}
795f757f3fSDimitry Andric///{
805f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY)         \
815f757f3fSDimitry Andric  static inline RETTY ompx_##NAME(ARGS) { BODY; }
825f757f3fSDimitry Andric
835f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
84*0fca6ea1SDimitry Andric                                      _Pragma("omp barrier"))
855f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
86*0fca6ea1SDimitry Andric                                      ompx_sync_block(ompx_acq_rel))
875f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
88*0fca6ea1SDimitry Andric                                      ompx_sync_block(Ordering))
895f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
905f757f3fSDimitry Andric///}
915f757f3fSDimitry Andric
92*0fca6ea1SDimitry Andricstatic inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
93*0fca6ea1SDimitry Andric  __builtin_trap();
94*0fca6ea1SDimitry Andric}
95*0fca6ea1SDimitry Andric
96*0fca6ea1SDimitry Andric/// ompx_shfl_down_sync_{i,f,l,d}
97*0fca6ea1SDimitry Andric///{
98*0fca6ea1SDimitry Andric#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY)                \
99*0fca6ea1SDimitry Andric  static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var,         \
100*0fca6ea1SDimitry Andric                                              unsigned delta, int width) {     \
101*0fca6ea1SDimitry Andric    __builtin_trap();                                                          \
102*0fca6ea1SDimitry Andric  }
103*0fca6ea1SDimitry Andric
104*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
105*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
106*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
107*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
108*0fca6ea1SDimitry Andric
109*0fca6ea1SDimitry Andric#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
110*0fca6ea1SDimitry Andric///}
111*0fca6ea1SDimitry Andric
1125f757f3fSDimitry Andric#pragma omp end declare variant
1135f757f3fSDimitry Andric
1145f757f3fSDimitry Andric/// ompx_{sync_block}_{,divergent}
1155f757f3fSDimitry Andric///{
1165f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS)         \
1175f757f3fSDimitry Andric  RETTY ompx_##NAME(ARGS);
1185f757f3fSDimitry Andric
119*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
120*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
121*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
1225f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
1235f757f3fSDimitry Andric///}
1245f757f3fSDimitry Andric
1255f757f3fSDimitry Andric/// ompx_{thread,block}_{id,dim}_{x,y,z}
1265f757f3fSDimitry Andric///{
1275f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME)                                 \
1285f757f3fSDimitry Andric  int ompx_##NAME(int Dim);                                                    \
1295f757f3fSDimitry Andric  static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); }      \
1305f757f3fSDimitry Andric  static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); }      \
1315f757f3fSDimitry Andric  static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); }
1325f757f3fSDimitry Andric
1335f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id)
1345f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim)
1355f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id)
1365f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
1375f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C
1385f757f3fSDimitry Andric///}
1395f757f3fSDimitry Andric
140*0fca6ea1SDimitry Andricuint64_t ompx_ballot_sync(uint64_t mask, int pred);
141*0fca6ea1SDimitry Andric
142*0fca6ea1SDimitry Andric/// ompx_shfl_down_sync_{i,f,l,d}
143*0fca6ea1SDimitry Andric///{
144*0fca6ea1SDimitry Andric#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY)                          \
145*0fca6ea1SDimitry Andric  TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta,       \
146*0fca6ea1SDimitry Andric                                int width);
147*0fca6ea1SDimitry Andric
148*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
149*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
150*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
151*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
152*0fca6ea1SDimitry Andric
153*0fca6ea1SDimitry Andric#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
154*0fca6ea1SDimitry Andric///}
155*0fca6ea1SDimitry Andric
1565f757f3fSDimitry Andric#ifdef __cplusplus
1575f757f3fSDimitry Andric}
1585f757f3fSDimitry Andric#endif
1595f757f3fSDimitry Andric
1605f757f3fSDimitry Andric#ifdef __cplusplus
1615f757f3fSDimitry Andric
1625f757f3fSDimitry Andricnamespace ompx {
1635f757f3fSDimitry Andric
1645f757f3fSDimitry Andricenum {
1655f757f3fSDimitry Andric  dim_x = ompx_dim_x,
1665f757f3fSDimitry Andric  dim_y = ompx_dim_y,
1675f757f3fSDimitry Andric  dim_z = ompx_dim_z,
1685f757f3fSDimitry Andric};
1695f757f3fSDimitry Andric
1705f757f3fSDimitry Andricenum {
1715f757f3fSDimitry Andric  relaxed = ompx_relaxed ,
1725f757f3fSDimitry Andric  aquire = ompx_aquire,
1735f757f3fSDimitry Andric  release = ompx_release,
1745f757f3fSDimitry Andric  acc_rel = ompx_acq_rel,
1755f757f3fSDimitry Andric  seq_cst = ompx_seq_cst,
1765f757f3fSDimitry Andric};
1775f757f3fSDimitry Andric
1785f757f3fSDimitry Andric/// ompx::{thread,block}_{id,dim}_{,x,y,z}
1795f757f3fSDimitry Andric///{
1805f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME)                          \
1815f757f3fSDimitry Andric  static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); }        \
1825f757f3fSDimitry Andric  static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); }           \
1835f757f3fSDimitry Andric  static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); }           \
1845f757f3fSDimitry Andric  static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); }
1855f757f3fSDimitry Andric
1865f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id)
1875f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim)
1885f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id)
1895f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
1905f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX
1915f757f3fSDimitry Andric///}
1925f757f3fSDimitry Andric
1935f757f3fSDimitry Andric/// ompx_{sync_block}_{,divergent}
1945f757f3fSDimitry Andric///{
1955f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS)  \
1965f757f3fSDimitry Andric  static inline RETTY NAME(ARGS) {               \
1975f757f3fSDimitry Andric    return ompx_##NAME(CALL_ARGS);                                             \
1985f757f3fSDimitry Andric  }
1995f757f3fSDimitry Andric
2005f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
201*0fca6ea1SDimitry Andric                                        Ordering)
2025f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
203*0fca6ea1SDimitry Andric                                        int Ordering = acc_rel, Ordering)
2045f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
2055f757f3fSDimitry Andric///}
2065f757f3fSDimitry Andric
207*0fca6ea1SDimitry Andricstatic inline uint64_t ballot_sync(uint64_t mask, int pred) {
208*0fca6ea1SDimitry Andric  return ompx_ballot_sync(mask, pred);
209*0fca6ea1SDimitry Andric}
210*0fca6ea1SDimitry Andric
211*0fca6ea1SDimitry Andric/// shfl_down_sync
212*0fca6ea1SDimitry Andric///{
213*0fca6ea1SDimitry Andric#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY)                          \
214*0fca6ea1SDimitry Andric  static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta,   \
215*0fca6ea1SDimitry Andric                                    int width = __WARP_SIZE) {                 \
216*0fca6ea1SDimitry Andric    return ompx_shfl_down_sync_##TY(mask, var, delta, width);                  \
217*0fca6ea1SDimitry Andric  }
218*0fca6ea1SDimitry Andric
219*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
220*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
221*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
222*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
223*0fca6ea1SDimitry Andric
224*0fca6ea1SDimitry Andric#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
225*0fca6ea1SDimitry Andric///}
226*0fca6ea1SDimitry Andric
2275f757f3fSDimitry Andric} // namespace ompx
2285f757f3fSDimitry Andric#endif
2295f757f3fSDimitry Andric
2305f757f3fSDimitry Andric///}
2315f757f3fSDimitry Andric
2325f757f3fSDimitry Andric#endif /* __OMPX_H */
233