1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // 10 //===----------------------------------------------------------------------===// 11 12 #include "DeviceUtils.h" 13 14 #include "Debug.h" 15 #include "Interface.h" 16 #include "Mapping.h" 17 18 #pragma omp begin declare target device_type(nohost) 19 20 using namespace ompx; 21 22 namespace impl { 23 24 bool isSharedMemPtr(const void *Ptr) { return false; } 25 26 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) { 27 static_assert(sizeof(unsigned long) == 8, ""); 28 *LowBits = static_cast<uint32_t>(Val & 0x00000000FFFFFFFFUL); 29 *HighBits = static_cast<uint32_t>((Val & 0xFFFFFFFF00000000UL) >> 32); 30 } 31 32 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) { 33 return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits; 34 } 35 36 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width); 37 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, 38 int32_t Width); 39 40 uint64_t ballotSync(uint64_t Mask, int32_t Pred); 41 42 /// AMDGCN Implementation 43 /// 44 ///{ 45 #pragma omp begin declare variant match(device = {arch(amdgcn)}) 46 47 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) { 48 int Self = mapping::getThreadIdInWarp(); 49 int Index = SrcLane + (Self & ~(Width - 1)); 50 return __builtin_amdgcn_ds_bpermute(Index << 2, Var); 51 } 52 53 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, 54 int32_t Width) { 55 int Self = mapping::getThreadIdInWarp(); 56 int Index = Self + LaneDelta; 57 Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index; 58 return __builtin_amdgcn_ds_bpermute(Index << 2, Var); 59 } 60 61 uint64_t ballotSync(uint64_t Mask, int32_t Pred) { 62 return Mask & __builtin_amdgcn_ballot_w64(Pred); 63 } 64 65 bool isSharedMemPtr(const void *Ptr) { 66 return __builtin_amdgcn_is_shared( 67 (const __attribute__((address_space(0))) void *)Ptr); 68 } 69 #pragma omp end declare variant 70 ///} 71 72 /// NVPTX Implementation 73 /// 74 ///{ 75 #pragma omp begin declare variant match( \ 76 device = {arch(nvptx, nvptx64)}, \ 77 implementation = {extension(match_any)}) 78 79 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) { 80 return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, Width - 1); 81 } 82 83 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) { 84 int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f; 85 return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T); 86 } 87 88 uint64_t ballotSync(uint64_t Mask, int32_t Pred) { 89 return __nvvm_vote_ballot_sync(static_cast<uint32_t>(Mask), Pred); 90 } 91 92 bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); } 93 94 #pragma omp end declare variant 95 ///} 96 } // namespace impl 97 98 uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) { 99 return impl::Pack(LowBits, HighBits); 100 } 101 102 void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) { 103 impl::Unpack(Val, &LowBits, &HighBits); 104 } 105 106 int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, 107 int32_t Width) { 108 return impl::shuffle(Mask, Var, SrcLane, Width); 109 } 110 111 int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, 112 int32_t Width) { 113 return impl::shuffleDown(Mask, Var, Delta, Width); 114 } 115 116 int64_t utils::shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, 117 int32_t Width) { 118 uint32_t Lo, Hi; 119 utils::unpack(Var, Lo, Hi); 120 Hi = impl::shuffleDown(Mask, Hi, Delta, Width); 121 Lo = impl::shuffleDown(Mask, Lo, Delta, Width); 122 return utils::pack(Lo, Hi); 123 } 124 125 uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) { 126 return impl::ballotSync(Mask, Pred); 127 } 128 129 bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); } 130 131 extern "C" { 132 int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { 133 return impl::shuffleDown(lanes::All, Val, Delta, SrcLane); 134 } 135 136 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) { 137 return utils::shuffleDown(lanes::All, Val, Delta, Width); 138 } 139 } 140 141 #pragma omp end declare target 142