1 //===-------- State.h - OpenMP State & ICV interface ------------- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // 10 //===----------------------------------------------------------------------===// 11 12 #ifndef OMPTARGET_STATE_H 13 #define OMPTARGET_STATE_H 14 15 #include "Shared/Environment.h" 16 17 #include "Debug.h" 18 #include "DeviceTypes.h" 19 #include "DeviceUtils.h" 20 #include "Mapping.h" 21 22 // Forward declaration. 23 struct KernelEnvironmentTy; 24 25 #pragma omp begin declare target device_type(nohost) 26 27 namespace ompx { 28 29 namespace memory { 30 31 /// Alloca \p Size bytes in shared memory, if possible, for \p Reason. 32 /// 33 /// Note: See the restrictions on __kmpc_alloc_shared for proper usage. 34 void *allocShared(uint64_t Size, const char *Reason); 35 36 /// Free \p Ptr, alloated via allocShared, for \p Reason. 37 /// 38 /// Note: See the restrictions on __kmpc_free_shared for proper usage. 39 void freeShared(void *Ptr, uint64_t Bytes, const char *Reason); 40 41 /// Alloca \p Size bytes in global memory, if possible, for \p Reason. 42 void *allocGlobal(uint64_t Size, const char *Reason); 43 44 /// Return a pointer to the dynamic shared memory buffer. 45 void *getDynamicBuffer(); 46 47 /// Free \p Ptr, alloated via allocGlobal, for \p Reason. 48 void freeGlobal(void *Ptr, const char *Reason); 49 50 } // namespace memory 51 52 namespace state { 53 54 inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE; 55 56 struct ICVStateTy { 57 uint32_t NThreadsVar; 58 uint32_t LevelVar; 59 uint32_t ActiveLevelVar; 60 uint32_t Padding0Val; 61 uint32_t MaxActiveLevelsVar; 62 uint32_t RunSchedVar; 63 uint32_t RunSchedChunkVar; 64 65 bool operator==(const ICVStateTy &Other) const; 66 67 void assertEqual(const ICVStateTy &Other) const; 68 }; 69 70 struct TeamStateTy { 71 void init(bool IsSPMD); 72 73 bool operator==(const TeamStateTy &) const; 74 75 void assertEqual(TeamStateTy &Other) const; 76 77 /// ICVs 78 /// 79 /// Preallocated storage for ICV values that are used if the threads have not 80 /// set a custom default. The latter is supported but unlikely and slow(er). 81 /// 82 ///{ 83 ICVStateTy ICVState; 84 ///} 85 86 uint32_t ParallelTeamSize; 87 uint32_t HasThreadState; 88 ParallelRegionFnTy ParallelRegionFnVar; 89 }; 90 91 extern TeamStateTy TeamState; 92 #pragma omp allocate(TeamState) allocator(omp_pteam_mem_alloc) 93 94 struct ThreadStateTy { 95 96 /// ICVs have preallocated storage in the TeamStateTy which is used if a 97 /// thread has not set a custom value. The latter is supported but unlikely. 98 /// When it happens we will allocate dynamic memory to hold the values of all 99 /// ICVs. Thus, the first time an ICV is set by a thread we will allocate an 100 /// ICV struct to hold them all. This is slower than alternatives but allows 101 /// users to pay only for what they use. 102 /// 103 state::ICVStateTy ICVState; 104 105 ThreadStateTy *PreviousThreadState; 106 107 void init() { 108 ICVState = TeamState.ICVState; 109 PreviousThreadState = nullptr; 110 } 111 112 void init(ThreadStateTy *PreviousTS) { 113 ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState; 114 PreviousThreadState = PreviousTS; 115 } 116 }; 117 118 extern ThreadStateTy **ThreadStates; 119 #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc) 120 121 /// Initialize the state machinery. Must be called by all threads. 122 void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment, 123 KernelLaunchEnvironmentTy &KernelLaunchEnvironment); 124 125 /// Return the kernel and kernel launch environment associated with the current 126 /// kernel. The former is static and contains compile time information that 127 /// holds for all instances of the kernel. The latter is dynamic and provides 128 /// per-launch information. 129 KernelEnvironmentTy &getKernelEnvironment(); 130 KernelLaunchEnvironmentTy &getKernelLaunchEnvironment(); 131 132 /// TODO 133 enum ValueKind { 134 VK_NThreads, 135 VK_Level, 136 VK_ActiveLevel, 137 VK_MaxActiveLevels, 138 VK_RunSched, 139 // --- 140 VK_RunSchedChunk, 141 VK_ParallelRegionFn, 142 VK_ParallelTeamSize, 143 VK_HasThreadState, 144 }; 145 146 /// TODO 147 void enterDataEnvironment(IdentTy *Ident); 148 149 /// TODO 150 void exitDataEnvironment(); 151 152 /// TODO 153 struct DateEnvironmentRAII { 154 DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); } 155 ~DateEnvironmentRAII() { exitDataEnvironment(); } 156 }; 157 158 /// TODO 159 void resetStateForThread(uint32_t TId); 160 161 // FIXME: https://github.com/llvm/llvm-project/issues/123241. 162 #define lookupForModify32Impl(Member, Ident, ForceTeamState) \ 163 { \ 164 if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() || \ 165 !TeamState.HasThreadState)) \ 166 return TeamState.ICVState.Member; \ 167 uint32_t TId = mapping::getThreadIdInBlock(); \ 168 if (OMP_UNLIKELY(!ThreadStates[TId])) { \ 169 ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>( \ 170 memory::allocGlobal(sizeof(ThreadStateTy), \ 171 "ICV modification outside data environment")); \ 172 ASSERT(ThreadStates[TId] != nullptr, "Nullptr returned by malloc!"); \ 173 TeamState.HasThreadState = true; \ 174 ThreadStates[TId]->init(); \ 175 } \ 176 return ThreadStates[TId]->ICVState.Member; \ 177 } 178 179 // FIXME: https://github.com/llvm/llvm-project/issues/123241. 180 #define lookupImpl(Member, ForceTeamState) \ 181 { \ 182 auto TId = mapping::getThreadIdInBlock(); \ 183 if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() && \ 184 TeamState.HasThreadState && ThreadStates[TId])) \ 185 return ThreadStates[TId]->ICVState.Member; \ 186 return TeamState.ICVState.Member; \ 187 } 188 189 [[gnu::always_inline, gnu::flatten]] inline uint32_t & 190 lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) { 191 switch (Kind) { 192 case state::VK_NThreads: 193 if (IsReadonly) 194 lookupImpl(NThreadsVar, ForceTeamState); 195 lookupForModify32Impl(NThreadsVar, Ident, ForceTeamState); 196 case state::VK_Level: 197 if (IsReadonly) 198 lookupImpl(LevelVar, ForceTeamState); 199 lookupForModify32Impl(LevelVar, Ident, ForceTeamState); 200 case state::VK_ActiveLevel: 201 if (IsReadonly) 202 lookupImpl(ActiveLevelVar, ForceTeamState); 203 lookupForModify32Impl(ActiveLevelVar, Ident, ForceTeamState); 204 case state::VK_MaxActiveLevels: 205 if (IsReadonly) 206 lookupImpl(MaxActiveLevelsVar, ForceTeamState); 207 lookupForModify32Impl(MaxActiveLevelsVar, Ident, ForceTeamState); 208 case state::VK_RunSched: 209 if (IsReadonly) 210 lookupImpl(RunSchedVar, ForceTeamState); 211 lookupForModify32Impl(RunSchedVar, Ident, ForceTeamState); 212 case state::VK_RunSchedChunk: 213 if (IsReadonly) 214 lookupImpl(RunSchedChunkVar, ForceTeamState); 215 lookupForModify32Impl(RunSchedChunkVar, Ident, ForceTeamState); 216 case state::VK_ParallelTeamSize: 217 return TeamState.ParallelTeamSize; 218 case state::VK_HasThreadState: 219 return TeamState.HasThreadState; 220 default: 221 break; 222 } 223 __builtin_unreachable(); 224 } 225 226 [[gnu::always_inline, gnu::flatten]] inline void *& 227 lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) { 228 switch (Kind) { 229 case state::VK_ParallelRegionFn: 230 return TeamState.ParallelRegionFnVar; 231 default: 232 break; 233 } 234 __builtin_unreachable(); 235 } 236 237 /// A class without actual state used to provide a nice interface to lookup and 238 /// update ICV values we can declare in global scope. 239 template <typename Ty, ValueKind Kind> struct Value { 240 [[gnu::flatten, gnu::always_inline]] operator Ty() { 241 return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr, 242 /*ForceTeamState=*/false); 243 } 244 245 [[gnu::flatten, gnu::always_inline]] Value &operator=(const Ty &Other) { 246 set(Other, /*IdentTy=*/nullptr); 247 return *this; 248 } 249 250 [[gnu::flatten, gnu::always_inline]] Value &operator++() { 251 inc(1, /*IdentTy=*/nullptr); 252 return *this; 253 } 254 255 [[gnu::flatten, gnu::always_inline]] Value &operator--() { 256 inc(-1, /*IdentTy=*/nullptr); 257 return *this; 258 } 259 260 [[gnu::flatten, gnu::always_inline]] void 261 assert_eq(const Ty &V, IdentTy *Ident = nullptr, 262 bool ForceTeamState = false) { 263 ASSERT(lookup(/*IsReadonly=*/true, Ident, ForceTeamState) == V, nullptr); 264 } 265 266 private: 267 [[gnu::flatten, gnu::always_inline]] Ty & 268 lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) { 269 Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState); 270 return t; 271 } 272 273 [[gnu::flatten, gnu::always_inline]] Ty &inc(int UpdateVal, IdentTy *Ident) { 274 return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) += 275 UpdateVal); 276 } 277 278 [[gnu::flatten, gnu::always_inline]] Ty &set(Ty UpdateVal, IdentTy *Ident) { 279 return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) = 280 UpdateVal); 281 } 282 283 template <typename VTy, typename Ty2> friend struct ValueRAII; 284 }; 285 286 /// A mookup class without actual state used to provide 287 /// a nice interface to lookup and update ICV values 288 /// we can declare in global scope. 289 template <typename Ty, ValueKind Kind> struct PtrValue { 290 [[gnu::flatten, gnu::always_inline]] operator Ty() { 291 return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr, 292 /*ForceTeamState=*/false); 293 } 294 295 [[gnu::flatten, gnu::always_inline]] PtrValue &operator=(const Ty Other) { 296 set(Other); 297 return *this; 298 } 299 300 private: 301 Ty &lookup(bool IsReadonly, IdentTy *, bool ForceTeamState) { 302 return lookupPtr(Kind, IsReadonly, ForceTeamState); 303 } 304 305 Ty &set(Ty UpdateVal) { 306 return (lookup(/*IsReadonly=*/false, /*IdentTy=*/nullptr, 307 /*ForceTeamState=*/false) = UpdateVal); 308 } 309 310 template <typename VTy, typename Ty2> friend struct ValueRAII; 311 }; 312 313 template <typename VTy, typename Ty> struct ValueRAII { 314 ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident, 315 bool ForceTeamState = false) 316 : Ptr(Active ? &V.lookup(/*IsReadonly=*/false, Ident, ForceTeamState) 317 : (Ty *)utils::UndefPtr), 318 Val(OldValue), Active(Active) { 319 if (!Active) 320 return; 321 ASSERT(*Ptr == OldValue, "ValueRAII initialization with wrong old value!"); 322 *Ptr = NewValue; 323 } 324 ~ValueRAII() { 325 if (Active) 326 *Ptr = Val; 327 } 328 329 private: 330 Ty *Ptr; 331 Ty Val; 332 bool Active; 333 }; 334 335 /// TODO 336 inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk; 337 338 /// TODO 339 inline state::Value<uint32_t, state::VK_ParallelTeamSize> ParallelTeamSize; 340 341 /// TODO 342 inline state::Value<uint32_t, state::VK_HasThreadState> HasThreadState; 343 344 /// TODO 345 inline state::PtrValue<ParallelRegionFnTy, state::VK_ParallelRegionFn> 346 ParallelRegionFn; 347 348 void runAndCheckState(void(Func(void))); 349 350 void assumeInitialState(bool IsSPMD); 351 352 /// Return the value of the ParallelTeamSize ICV. 353 int getEffectivePTeamSize(); 354 355 } // namespace state 356 357 namespace icv { 358 359 /// TODO 360 inline state::Value<uint32_t, state::VK_NThreads> NThreads; 361 362 /// TODO 363 inline state::Value<uint32_t, state::VK_Level> Level; 364 365 /// The `active-level` describes which of the parallel level counted with the 366 /// `level-var` is active. There can only be one. 367 /// 368 /// active-level-var is 1, if ActiveLevelVar is not 0, otherweise it is 0. 369 inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel; 370 371 /// TODO 372 inline state::Value<uint32_t, state::VK_MaxActiveLevels> MaxActiveLevels; 373 374 /// TODO 375 inline state::Value<uint32_t, state::VK_RunSched> RunSched; 376 377 } // namespace icv 378 379 } // namespace ompx 380 381 #pragma omp end declare target 382 383 #endif 384