15f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 25f757f3fSDimitry Andric// 35f757f3fSDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45f757f3fSDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 55f757f3fSDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65f757f3fSDimitry Andric// 75f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 85f757f3fSDimitry Andric 95f757f3fSDimitry Andric#ifndef __OMPX_H 105f757f3fSDimitry Andric#define __OMPX_H 115f757f3fSDimitry Andric 12*0fca6ea1SDimitry Andric#ifdef __AMDGCN_WAVEFRONT_SIZE 13*0fca6ea1SDimitry Andric#define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE 14*0fca6ea1SDimitry Andric#else 15*0fca6ea1SDimitry Andric#define __WARP_SIZE 32 16*0fca6ea1SDimitry Andric#endif 17*0fca6ea1SDimitry Andric 18*0fca6ea1SDimitry Andrictypedef unsigned long uint64_t; 19*0fca6ea1SDimitry Andric 205f757f3fSDimitry Andric#ifdef __cplusplus 215f757f3fSDimitry Andricextern "C" { 225f757f3fSDimitry Andric#endif 235f757f3fSDimitry Andric 245f757f3fSDimitry Andricint omp_get_ancestor_thread_num(int); 255f757f3fSDimitry Andricint omp_get_team_size(int); 265f757f3fSDimitry Andric 275f757f3fSDimitry Andric#ifdef __cplusplus 285f757f3fSDimitry Andric} 295f757f3fSDimitry Andric#endif 305f757f3fSDimitry Andric 315f757f3fSDimitry Andric/// Target kernel language extensions 325f757f3fSDimitry Andric/// 335f757f3fSDimitry Andric/// These extensions exist for the host to allow fallback implementations, 345f757f3fSDimitry Andric/// however, they cannot be arbitrarily composed with OpenMP. If the rules of 355f757f3fSDimitry Andric/// the kernel language are followed, the host fallbacks should behave as 365f757f3fSDimitry Andric/// expected since the kernel is represented as 3 sequential outer loops, one 375f757f3fSDimitry Andric/// for each grid dimension, and three (nested) parallel loops, one for each 385f757f3fSDimitry Andric/// block dimension. This fallback is not supposed to be optimal and should be 395f757f3fSDimitry Andric/// configurable by the user. 405f757f3fSDimitry Andric/// 415f757f3fSDimitry Andric///{ 425f757f3fSDimitry Andric 435f757f3fSDimitry Andric#ifdef __cplusplus 445f757f3fSDimitry Andricextern "C" { 455f757f3fSDimitry Andric#endif 465f757f3fSDimitry Andric 475f757f3fSDimitry Andricenum { 485f757f3fSDimitry Andric ompx_relaxed = __ATOMIC_RELAXED, 495f757f3fSDimitry Andric ompx_aquire = __ATOMIC_ACQUIRE, 505f757f3fSDimitry Andric ompx_release = __ATOMIC_RELEASE, 515f757f3fSDimitry Andric ompx_acq_rel = __ATOMIC_ACQ_REL, 525f757f3fSDimitry Andric ompx_seq_cst = __ATOMIC_SEQ_CST, 535f757f3fSDimitry Andric}; 545f757f3fSDimitry Andric 555f757f3fSDimitry Andricenum { 565f757f3fSDimitry Andric ompx_dim_x = 0, 575f757f3fSDimitry Andric ompx_dim_y = 1, 585f757f3fSDimitry Andric ompx_dim_z = 2, 595f757f3fSDimitry Andric}; 605f757f3fSDimitry Andric 61*0fca6ea1SDimitry Andric// TODO: The following implementation is for host fallback. We need to disable 62*0fca6ea1SDimitry Andric// generation of host fallback in kernel language mode. 63*0fca6ea1SDimitry Andric#pragma omp begin declare variant match(device = {kind(cpu)}) 64*0fca6ea1SDimitry Andric 655f757f3fSDimitry Andric/// ompx_{thread,block}_{id,dim} 665f757f3fSDimitry Andric///{ 675f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE) \ 685f757f3fSDimitry Andric static inline int ompx_##NAME(int Dim) { return VALUE; } 695f757f3fSDimitry Andric 705f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id, 715f757f3fSDimitry Andric omp_get_ancestor_thread_num(Dim + 1)) 725f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, omp_get_team_size(Dim + 1)) 735f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0) 745f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1) 755f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C 765f757f3fSDimitry Andric///} 775f757f3fSDimitry Andric 785f757f3fSDimitry Andric/// ompx_{sync_block}_{,divergent} 795f757f3fSDimitry Andric///{ 805f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY) \ 815f757f3fSDimitry Andric static inline RETTY ompx_##NAME(ARGS) { BODY; } 825f757f3fSDimitry Andric 835f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering, 84*0fca6ea1SDimitry Andric _Pragma("omp barrier")) 855f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void, 86*0fca6ea1SDimitry Andric ompx_sync_block(ompx_acq_rel)) 875f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering, 88*0fca6ea1SDimitry Andric ompx_sync_block(Ordering)) 895f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C 905f757f3fSDimitry Andric///} 915f757f3fSDimitry Andric 92*0fca6ea1SDimitry Andricstatic inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) { 93*0fca6ea1SDimitry Andric __builtin_trap(); 94*0fca6ea1SDimitry Andric} 95*0fca6ea1SDimitry Andric 96*0fca6ea1SDimitry Andric/// ompx_shfl_down_sync_{i,f,l,d} 97*0fca6ea1SDimitry Andric///{ 98*0fca6ea1SDimitry Andric#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \ 99*0fca6ea1SDimitry Andric static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \ 100*0fca6ea1SDimitry Andric unsigned delta, int width) { \ 101*0fca6ea1SDimitry Andric __builtin_trap(); \ 102*0fca6ea1SDimitry Andric } 103*0fca6ea1SDimitry Andric 104*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i) 105*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f) 106*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l) 107*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d) 108*0fca6ea1SDimitry Andric 109*0fca6ea1SDimitry Andric#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL 110*0fca6ea1SDimitry Andric///} 111*0fca6ea1SDimitry Andric 1125f757f3fSDimitry Andric#pragma omp end declare variant 1135f757f3fSDimitry Andric 1145f757f3fSDimitry Andric/// ompx_{sync_block}_{,divergent} 1155f757f3fSDimitry Andric///{ 1165f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \ 1175f757f3fSDimitry Andric RETTY ompx_##NAME(ARGS); 1185f757f3fSDimitry Andric 119*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering) 120*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void) 121*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering) 1225f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C 1235f757f3fSDimitry Andric///} 1245f757f3fSDimitry Andric 1255f757f3fSDimitry Andric/// ompx_{thread,block}_{id,dim}_{x,y,z} 1265f757f3fSDimitry Andric///{ 1275f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME) \ 1285f757f3fSDimitry Andric int ompx_##NAME(int Dim); \ 1295f757f3fSDimitry Andric static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); } \ 1305f757f3fSDimitry Andric static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); } \ 1315f757f3fSDimitry Andric static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); } 1325f757f3fSDimitry Andric 1335f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id) 1345f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim) 1355f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id) 1365f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim) 1375f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C 1385f757f3fSDimitry Andric///} 1395f757f3fSDimitry Andric 140*0fca6ea1SDimitry Andricuint64_t ompx_ballot_sync(uint64_t mask, int pred); 141*0fca6ea1SDimitry Andric 142*0fca6ea1SDimitry Andric/// ompx_shfl_down_sync_{i,f,l,d} 143*0fca6ea1SDimitry Andric///{ 144*0fca6ea1SDimitry Andric#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \ 145*0fca6ea1SDimitry Andric TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \ 146*0fca6ea1SDimitry Andric int width); 147*0fca6ea1SDimitry Andric 148*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i) 149*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f) 150*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l) 151*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d) 152*0fca6ea1SDimitry Andric 153*0fca6ea1SDimitry Andric#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC 154*0fca6ea1SDimitry Andric///} 155*0fca6ea1SDimitry Andric 1565f757f3fSDimitry Andric#ifdef __cplusplus 1575f757f3fSDimitry Andric} 1585f757f3fSDimitry Andric#endif 1595f757f3fSDimitry Andric 1605f757f3fSDimitry Andric#ifdef __cplusplus 1615f757f3fSDimitry Andric 1625f757f3fSDimitry Andricnamespace ompx { 1635f757f3fSDimitry Andric 1645f757f3fSDimitry Andricenum { 1655f757f3fSDimitry Andric dim_x = ompx_dim_x, 1665f757f3fSDimitry Andric dim_y = ompx_dim_y, 1675f757f3fSDimitry Andric dim_z = ompx_dim_z, 1685f757f3fSDimitry Andric}; 1695f757f3fSDimitry Andric 1705f757f3fSDimitry Andricenum { 1715f757f3fSDimitry Andric relaxed = ompx_relaxed , 1725f757f3fSDimitry Andric aquire = ompx_aquire, 1735f757f3fSDimitry Andric release = ompx_release, 1745f757f3fSDimitry Andric acc_rel = ompx_acq_rel, 1755f757f3fSDimitry Andric seq_cst = ompx_seq_cst, 1765f757f3fSDimitry Andric}; 1775f757f3fSDimitry Andric 1785f757f3fSDimitry Andric/// ompx::{thread,block}_{id,dim}_{,x,y,z} 1795f757f3fSDimitry Andric///{ 1805f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME) \ 1815f757f3fSDimitry Andric static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); } \ 1825f757f3fSDimitry Andric static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); } \ 1835f757f3fSDimitry Andric static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); } \ 1845f757f3fSDimitry Andric static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); } 1855f757f3fSDimitry Andric 1865f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id) 1875f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim) 1885f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id) 1895f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim) 1905f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX 1915f757f3fSDimitry Andric///} 1925f757f3fSDimitry Andric 1935f757f3fSDimitry Andric/// ompx_{sync_block}_{,divergent} 1945f757f3fSDimitry Andric///{ 1955f757f3fSDimitry Andric#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS) \ 1965f757f3fSDimitry Andric static inline RETTY NAME(ARGS) { \ 1975f757f3fSDimitry Andric return ompx_##NAME(CALL_ARGS); \ 1985f757f3fSDimitry Andric } 1995f757f3fSDimitry Andric 2005f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel, 201*0fca6ea1SDimitry Andric Ordering) 2025f757f3fSDimitry Andric_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent, 203*0fca6ea1SDimitry Andric int Ordering = acc_rel, Ordering) 2045f757f3fSDimitry Andric#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX 2055f757f3fSDimitry Andric///} 2065f757f3fSDimitry Andric 207*0fca6ea1SDimitry Andricstatic inline uint64_t ballot_sync(uint64_t mask, int pred) { 208*0fca6ea1SDimitry Andric return ompx_ballot_sync(mask, pred); 209*0fca6ea1SDimitry Andric} 210*0fca6ea1SDimitry Andric 211*0fca6ea1SDimitry Andric/// shfl_down_sync 212*0fca6ea1SDimitry Andric///{ 213*0fca6ea1SDimitry Andric#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \ 214*0fca6ea1SDimitry Andric static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \ 215*0fca6ea1SDimitry Andric int width = __WARP_SIZE) { \ 216*0fca6ea1SDimitry Andric return ompx_shfl_down_sync_##TY(mask, var, delta, width); \ 217*0fca6ea1SDimitry Andric } 218*0fca6ea1SDimitry Andric 219*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i) 220*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f) 221*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l) 222*0fca6ea1SDimitry Andric_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d) 223*0fca6ea1SDimitry Andric 224*0fca6ea1SDimitry Andric#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC 225*0fca6ea1SDimitry Andric///} 226*0fca6ea1SDimitry Andric 2275f757f3fSDimitry Andric} // namespace ompx 2285f757f3fSDimitry Andric#endif 2295f757f3fSDimitry Andric 2305f757f3fSDimitry Andric///} 2315f757f3fSDimitry Andric 2325f757f3fSDimitry Andric#endif /* __OMPX_H */ 233