xref: /llvm-project/offload/DeviceRTL/src/DeviceUtils.cpp (revision 08533a3ee8f3a09a59cf6ac3be59198b26b7f739)
1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "DeviceUtils.h"
13 
14 #include "Debug.h"
15 #include "Interface.h"
16 #include "Mapping.h"
17 
18 #pragma omp begin declare target device_type(nohost)
19 
20 using namespace ompx;
21 
22 namespace impl {
23 
24 bool isSharedMemPtr(const void *Ptr) { return false; }
25 
26 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
27   static_assert(sizeof(unsigned long) == 8, "");
28   *LowBits = static_cast<uint32_t>(Val & 0x00000000FFFFFFFFUL);
29   *HighBits = static_cast<uint32_t>((Val & 0xFFFFFFFF00000000UL) >> 32);
30 }
31 
32 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
33   return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits;
34 }
35 
36 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
37 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
38                     int32_t Width);
39 
40 uint64_t ballotSync(uint64_t Mask, int32_t Pred);
41 
42 /// AMDGCN Implementation
43 ///
44 ///{
45 #pragma omp begin declare variant match(device = {arch(amdgcn)})
46 
47 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
48   int Self = mapping::getThreadIdInWarp();
49   int Index = SrcLane + (Self & ~(Width - 1));
50   return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
51 }
52 
53 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
54                     int32_t Width) {
55   int Self = mapping::getThreadIdInWarp();
56   int Index = Self + LaneDelta;
57   Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index;
58   return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
59 }
60 
61 uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
62   return Mask & __builtin_amdgcn_ballot_w64(Pred);
63 }
64 
65 bool isSharedMemPtr(const void *Ptr) {
66   return __builtin_amdgcn_is_shared(
67       (const __attribute__((address_space(0))) void *)Ptr);
68 }
69 #pragma omp end declare variant
70 ///}
71 
72 /// NVPTX Implementation
73 ///
74 ///{
75 #pragma omp begin declare variant match(                                       \
76         device = {arch(nvptx, nvptx64)},                                       \
77             implementation = {extension(match_any)})
78 
79 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
80   return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, Width - 1);
81 }
82 
83 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
84   int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f;
85   return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
86 }
87 
88 uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
89   return __nvvm_vote_ballot_sync(static_cast<uint32_t>(Mask), Pred);
90 }
91 
92 bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }
93 
94 #pragma omp end declare variant
95 ///}
96 } // namespace impl
97 
98 uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) {
99   return impl::Pack(LowBits, HighBits);
100 }
101 
102 void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) {
103   impl::Unpack(Val, &LowBits, &HighBits);
104 }
105 
106 int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane,
107                        int32_t Width) {
108   return impl::shuffle(Mask, Var, SrcLane, Width);
109 }
110 
111 int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
112                            int32_t Width) {
113   return impl::shuffleDown(Mask, Var, Delta, Width);
114 }
115 
116 int64_t utils::shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta,
117                            int32_t Width) {
118   uint32_t Lo, Hi;
119   utils::unpack(Var, Lo, Hi);
120   Hi = impl::shuffleDown(Mask, Hi, Delta, Width);
121   Lo = impl::shuffleDown(Mask, Lo, Delta, Width);
122   return utils::pack(Lo, Hi);
123 }
124 
125 uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) {
126   return impl::ballotSync(Mask, Pred);
127 }
128 
129 bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); }
130 
131 extern "C" {
132 int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) {
133   return impl::shuffleDown(lanes::All, Val, Delta, SrcLane);
134 }
135 
136 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) {
137   return utils::shuffleDown(lanes::All, Val, Delta, Width);
138 }
139 }
140 
141 #pragma omp end declare target
142