xref: /llvm-project/libc/shared/rpc_util.h (revision f855ceeefc97220a052cc76a52a45c6907eac1f8)
1 //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SHARED_RPC_UTIL_H
10 #define LLVM_LIBC_SHARED_RPC_UTIL_H
11 
12 #include <stddef.h>
13 #include <stdint.h>
14 
15 #if (defined(__NVPTX__) || defined(__AMDGPU__)) &&                             \
16     !((defined(__CUDA__) && !defined(__CUDA_ARCH__)) ||                        \
17       (defined(__HIP__) && !defined(__HIP_DEVICE_COMPILE__)))
18 #include <gpuintrin.h>
19 #define RPC_TARGET_IS_GPU
20 #endif
21 
22 // Workaround for missing __has_builtin in < GCC 10.
23 #ifndef __has_builtin
24 #define __has_builtin(x) 0
25 #endif
26 
27 #ifndef RPC_ATTRS
28 #if defined(__CUDA__) || defined(__HIP__)
29 #define RPC_ATTRS __attribute__((host, device)) inline
30 #else
31 #define RPC_ATTRS inline
32 #endif
33 #endif
34 
35 namespace rpc {
36 
37 template <typename T> struct type_identity {
38   using type = T;
39 };
40 
41 template <class T, T v> struct type_constant {
42   static inline constexpr T value = v;
43 };
44 
45 template <class T> struct remove_reference : type_identity<T> {};
46 template <class T> struct remove_reference<T &> : type_identity<T> {};
47 template <class T> struct remove_reference<T &&> : type_identity<T> {};
48 
49 template <class T> struct is_const : type_constant<bool, false> {};
50 template <class T> struct is_const<const T> : type_constant<bool, true> {};
51 
52 /// Freestanding implementation of std::move.
53 template <class T>
54 RPC_ATTRS constexpr typename remove_reference<T>::type &&move(T &&t) {
55   return static_cast<typename remove_reference<T>::type &&>(t);
56 }
57 
58 /// Freestanding implementation of std::forward.
59 template <typename T>
60 RPC_ATTRS constexpr T &&forward(typename remove_reference<T>::type &value) {
61   return static_cast<T &&>(value);
62 }
63 template <typename T>
64 RPC_ATTRS constexpr T &&forward(typename remove_reference<T>::type &&value) {
65   return static_cast<T &&>(value);
66 }
67 
68 struct in_place_t {
69   RPC_ATTRS explicit in_place_t() = default;
70 };
71 
72 struct nullopt_t {
73   RPC_ATTRS constexpr explicit nullopt_t() = default;
74 };
75 
76 constexpr inline in_place_t in_place{};
77 constexpr inline nullopt_t nullopt{};
78 
79 /// Freestanding and minimal implementation of std::optional.
80 template <typename T> class optional {
81   template <typename U> struct OptionalStorage {
82     union {
83       char empty;
84       U stored_value;
85     };
86 
87     bool in_use = false;
88 
89     RPC_ATTRS ~OptionalStorage() { reset(); }
90 
91     RPC_ATTRS constexpr OptionalStorage() : empty() {}
92 
93     template <typename... Args>
94     RPC_ATTRS constexpr explicit OptionalStorage(in_place_t, Args &&...args)
95         : stored_value(forward<Args>(args)...) {}
96 
97     RPC_ATTRS constexpr void reset() {
98       if (in_use)
99         stored_value.~U();
100       in_use = false;
101     }
102   };
103 
104   OptionalStorage<T> storage;
105 
106 public:
107   RPC_ATTRS constexpr optional() = default;
108   RPC_ATTRS constexpr optional(nullopt_t) {}
109 
110   RPC_ATTRS constexpr optional(const T &t) : storage(in_place, t) {
111     storage.in_use = true;
112   }
113   RPC_ATTRS constexpr optional(const optional &) = default;
114 
115   RPC_ATTRS constexpr optional(T &&t) : storage(in_place, move(t)) {
116     storage.in_use = true;
117   }
118   RPC_ATTRS constexpr optional(optional &&O) = default;
119 
120   RPC_ATTRS constexpr optional &operator=(T &&t) {
121     storage = move(t);
122     return *this;
123   }
124   RPC_ATTRS constexpr optional &operator=(optional &&) = default;
125 
126   RPC_ATTRS constexpr optional &operator=(const T &t) {
127     storage = t;
128     return *this;
129   }
130   RPC_ATTRS constexpr optional &operator=(const optional &) = default;
131 
132   RPC_ATTRS constexpr void reset() { storage.reset(); }
133 
134   RPC_ATTRS constexpr const T &value() const & { return storage.stored_value; }
135 
136   RPC_ATTRS constexpr T &value() & { return storage.stored_value; }
137 
138   RPC_ATTRS constexpr explicit operator bool() const { return storage.in_use; }
139   RPC_ATTRS constexpr bool has_value() const { return storage.in_use; }
140   RPC_ATTRS constexpr const T *operator->() const {
141     return &storage.stored_value;
142   }
143   RPC_ATTRS constexpr T *operator->() { return &storage.stored_value; }
144   RPC_ATTRS constexpr const T &operator*() const & {
145     return storage.stored_value;
146   }
147   RPC_ATTRS constexpr T &operator*() & { return storage.stored_value; }
148 
149   RPC_ATTRS constexpr T &&value() && { return move(storage.stored_value); }
150   RPC_ATTRS constexpr T &&operator*() && { return move(storage.stored_value); }
151 };
152 
153 /// Suspend the thread briefly to assist the thread scheduler during busy loops.
154 RPC_ATTRS void sleep_briefly() {
155 #if __has_builtin(__nvvm_reflect)
156   if (__nvvm_reflect("__CUDA_ARCH") >= 700)
157     asm("nanosleep.u32 64;" ::: "memory");
158 #elif __has_builtin(__builtin_amdgcn_s_sleep)
159   __builtin_amdgcn_s_sleep(2);
160 #elif __has_builtin(__builtin_ia32_pause)
161   __builtin_ia32_pause();
162 #elif __has_builtin(__builtin_arm_isb)
163   __builtin_arm_isb(0xf);
164 #else
165   // Simply do nothing if sleeping isn't supported on this platform.
166 #endif
167 }
168 
169 /// Conditional to indicate if this process is running on the GPU.
170 RPC_ATTRS constexpr bool is_process_gpu() {
171 #ifdef RPC_TARGET_IS_GPU
172   return true;
173 #else
174   return false;
175 #endif
176 }
177 
178 /// Wait for all lanes in the group to complete.
179 RPC_ATTRS void sync_lane([[maybe_unused]] uint64_t lane_mask) {
180 #ifdef RPC_TARGET_IS_GPU
181   return __gpu_sync_lane(lane_mask);
182 #endif
183 }
184 
185 /// Copies the value from the first active thread to the rest.
186 RPC_ATTRS uint32_t broadcast_value([[maybe_unused]] uint64_t lane_mask,
187                                    uint32_t x) {
188 #ifdef RPC_TARGET_IS_GPU
189   return __gpu_read_first_lane_u32(lane_mask, x);
190 #else
191   return x;
192 #endif
193 }
194 
195 /// Returns the number lanes that participate in the RPC interface.
196 RPC_ATTRS uint32_t get_num_lanes() {
197 #ifdef RPC_TARGET_IS_GPU
198   return __gpu_num_lanes();
199 #else
200   return 1;
201 #endif
202 }
203 
204 /// Returns the id of the thread inside of an AMD wavefront executing together.
205 RPC_ATTRS uint64_t get_lane_mask() {
206 #ifdef RPC_TARGET_IS_GPU
207   return __gpu_lane_mask();
208 #else
209   return 1;
210 #endif
211 }
212 
213 /// Returns the id of the thread inside of an AMD wavefront executing together.
214 RPC_ATTRS uint32_t get_lane_id() {
215 #ifdef RPC_TARGET_IS_GPU
216   return __gpu_lane_id();
217 #else
218   return 0;
219 #endif
220 }
221 
222 /// Conditional that is only true for a single thread in a lane.
223 RPC_ATTRS bool is_first_lane([[maybe_unused]] uint64_t lane_mask) {
224 #ifdef RPC_TARGET_IS_GPU
225   return __gpu_is_first_in_lane(lane_mask);
226 #else
227   return true;
228 #endif
229 }
230 
231 /// Returns a bitmask of threads in the current lane for which \p x is true.
232 RPC_ATTRS uint64_t ballot([[maybe_unused]] uint64_t lane_mask, bool x) {
233 #ifdef RPC_TARGET_IS_GPU
234   return __gpu_ballot(lane_mask, x);
235 #else
236   return x;
237 #endif
238 }
239 
240 /// Return \p val aligned "upwards" according to \p align.
241 template <typename V, typename A>
242 RPC_ATTRS constexpr V align_up(V val, A align) {
243   return ((val + V(align) - 1) / V(align)) * V(align);
244 }
245 
246 /// Utility to provide a unified interface between the CPU and GPU's memory
247 /// model. On the GPU stack variables are always private to a lane so we can
248 /// simply use the variable passed in. On the CPU we need to allocate enough
249 /// space for the whole lane and index into it.
250 template <typename V> RPC_ATTRS V &lane_value(V *val, uint32_t id) {
251   if constexpr (is_process_gpu())
252     return *val;
253   return val[id];
254 }
255 
256 /// Advance the \p p by \p bytes.
257 template <typename T, typename U> RPC_ATTRS T *advance(T *ptr, U bytes) {
258   if constexpr (is_const<T>::value)
259     return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) +
260                                  bytes);
261   else
262     return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes);
263 }
264 
265 /// Wrapper around the optimal memory copy implementation for the target.
266 RPC_ATTRS void rpc_memcpy(void *dst, const void *src, size_t count) {
267   __builtin_memcpy(dst, src, count);
268 }
269 
270 template <class T> RPC_ATTRS constexpr const T &max(const T &a, const T &b) {
271   return (a < b) ? b : a;
272 }
273 
274 } // namespace rpc
275 
276 #endif // LLVM_LIBC_SHARED_RPC_UTIL_H
277