1330d8983SJohannes Doerfert //===- PluginInterface.h - Target independent plugin device interface -----===// 2330d8983SJohannes Doerfert // 3330d8983SJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4330d8983SJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information. 5330d8983SJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6330d8983SJohannes Doerfert // 7330d8983SJohannes Doerfert //===----------------------------------------------------------------------===// 8330d8983SJohannes Doerfert // 9330d8983SJohannes Doerfert //===----------------------------------------------------------------------===// 10330d8983SJohannes Doerfert 11330d8983SJohannes Doerfert #ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_PLUGININTERFACE_H 12330d8983SJohannes Doerfert #define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_PLUGININTERFACE_H 13330d8983SJohannes Doerfert 14330d8983SJohannes Doerfert #include <cstddef> 15330d8983SJohannes Doerfert #include <cstdint> 16330d8983SJohannes Doerfert #include <deque> 17330d8983SJohannes Doerfert #include <list> 18330d8983SJohannes Doerfert #include <map> 19330d8983SJohannes Doerfert #include <shared_mutex> 20330d8983SJohannes Doerfert #include <vector> 21330d8983SJohannes Doerfert 22c95abe94SJohannes Doerfert #include "ExclusiveAccess.h" 2354b5c76dSJohannes Doerfert #include "Shared/APITypes.h" 24330d8983SJohannes Doerfert #include "Shared/Debug.h" 25330d8983SJohannes Doerfert #include "Shared/Environment.h" 26330d8983SJohannes Doerfert #include "Shared/EnvironmentVar.h" 27330d8983SJohannes Doerfert #include "Shared/Requirements.h" 28330d8983SJohannes Doerfert #include "Shared/Utils.h" 29330d8983SJohannes Doerfert 30330d8983SJohannes Doerfert #include "GlobalHandler.h" 31330d8983SJohannes Doerfert #include "JIT.h" 32330d8983SJohannes Doerfert #include "MemoryManager.h" 33330d8983SJohannes Doerfert #include "RPC.h" 34330d8983SJohannes Doerfert #include "omptarget.h" 35330d8983SJohannes Doerfert 36330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT 37330d8983SJohannes Doerfert #include "omp-tools.h" 38330d8983SJohannes Doerfert #endif 39330d8983SJohannes Doerfert 40330d8983SJohannes Doerfert #include "llvm/ADT/SmallVector.h" 41330d8983SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h" 42330d8983SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPGridValues.h" 43330d8983SJohannes Doerfert #include "llvm/Support/Allocator.h" 44330d8983SJohannes Doerfert #include "llvm/Support/Error.h" 45330d8983SJohannes Doerfert #include "llvm/Support/ErrorHandling.h" 46330d8983SJohannes Doerfert #include "llvm/Support/MemoryBufferRef.h" 47330d8983SJohannes Doerfert #include "llvm/Support/raw_ostream.h" 48330d8983SJohannes Doerfert #include "llvm/TargetParser/Triple.h" 49330d8983SJohannes Doerfert 50330d8983SJohannes Doerfert namespace llvm { 51330d8983SJohannes Doerfert namespace omp { 52330d8983SJohannes Doerfert namespace target { 53330d8983SJohannes Doerfert 54330d8983SJohannes Doerfert namespace plugin { 55330d8983SJohannes Doerfert 56330d8983SJohannes Doerfert struct GenericPluginTy; 57330d8983SJohannes Doerfert struct GenericKernelTy; 58330d8983SJohannes Doerfert struct GenericDeviceTy; 59f42f57b5SJoseph Huber struct RecordReplayTy; 60330d8983SJohannes Doerfert 61330d8983SJohannes Doerfert /// Class that wraps the __tgt_async_info to simply its usage. In case the 62330d8983SJohannes Doerfert /// object is constructed without a valid __tgt_async_info, the object will use 63330d8983SJohannes Doerfert /// an internal one and will synchronize the current thread with the pending 64330d8983SJohannes Doerfert /// operations when calling AsyncInfoWrapperTy::finalize(). This latter function 65330d8983SJohannes Doerfert /// must be called before destroying the wrapper object. 66330d8983SJohannes Doerfert struct AsyncInfoWrapperTy { 67330d8983SJohannes Doerfert AsyncInfoWrapperTy(GenericDeviceTy &Device, __tgt_async_info *AsyncInfoPtr); 68330d8983SJohannes Doerfert 69330d8983SJohannes Doerfert ~AsyncInfoWrapperTy() { 70330d8983SJohannes Doerfert assert(!AsyncInfoPtr && "AsyncInfoWrapperTy not finalized"); 71330d8983SJohannes Doerfert } 72330d8983SJohannes Doerfert 73330d8983SJohannes Doerfert /// Get the raw __tgt_async_info pointer. 74330d8983SJohannes Doerfert operator __tgt_async_info *() const { return AsyncInfoPtr; } 75330d8983SJohannes Doerfert 76330d8983SJohannes Doerfert /// Indicate whether there is queue. 77330d8983SJohannes Doerfert bool hasQueue() const { return (AsyncInfoPtr->Queue != nullptr); } 78330d8983SJohannes Doerfert 79330d8983SJohannes Doerfert /// Get the queue. 80330d8983SJohannes Doerfert template <typename Ty> Ty getQueueAs() { 81330d8983SJohannes Doerfert static_assert(sizeof(Ty) == sizeof(AsyncInfoPtr->Queue), 82330d8983SJohannes Doerfert "Queue is not of the same size as target type"); 83330d8983SJohannes Doerfert return static_cast<Ty>(AsyncInfoPtr->Queue); 84330d8983SJohannes Doerfert } 85330d8983SJohannes Doerfert 86330d8983SJohannes Doerfert /// Set the queue. 87330d8983SJohannes Doerfert template <typename Ty> void setQueueAs(Ty Queue) { 88330d8983SJohannes Doerfert static_assert(sizeof(Ty) == sizeof(AsyncInfoPtr->Queue), 89330d8983SJohannes Doerfert "Queue is not of the same size as target type"); 90330d8983SJohannes Doerfert assert(!AsyncInfoPtr->Queue && "Overwriting queue"); 91330d8983SJohannes Doerfert AsyncInfoPtr->Queue = Queue; 92330d8983SJohannes Doerfert } 93330d8983SJohannes Doerfert 94330d8983SJohannes Doerfert /// Synchronize with the __tgt_async_info's pending operations if it's the 95330d8983SJohannes Doerfert /// internal async info. The error associated to the aysnchronous operations 96330d8983SJohannes Doerfert /// issued in this queue must be provided in \p Err. This function will update 97330d8983SJohannes Doerfert /// the error parameter with the result of the synchronization if it was 98330d8983SJohannes Doerfert /// actually executed. This function must be called before destroying the 99330d8983SJohannes Doerfert /// object and only once. 100330d8983SJohannes Doerfert void finalize(Error &Err); 101330d8983SJohannes Doerfert 102330d8983SJohannes Doerfert /// Register \p Ptr as an associated alloction that is freed after 103330d8983SJohannes Doerfert /// finalization. 104330d8983SJohannes Doerfert void freeAllocationAfterSynchronization(void *Ptr) { 105330d8983SJohannes Doerfert AsyncInfoPtr->AssociatedAllocations.push_back(Ptr); 106330d8983SJohannes Doerfert } 107330d8983SJohannes Doerfert 108330d8983SJohannes Doerfert private: 109330d8983SJohannes Doerfert GenericDeviceTy &Device; 110330d8983SJohannes Doerfert __tgt_async_info LocalAsyncInfo; 111330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr; 112330d8983SJohannes Doerfert }; 113330d8983SJohannes Doerfert 114330d8983SJohannes Doerfert /// The information level represents the level of a key-value property in the 115330d8983SJohannes Doerfert /// info tree print (i.e. indentation). The first level should be the default. 116330d8983SJohannes Doerfert enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 }; 117330d8983SJohannes Doerfert 118330d8983SJohannes Doerfert /// Class for storing device information and later be printed. An object of this 119330d8983SJohannes Doerfert /// type acts as a queue of key-value properties. Each property has a key, a 120330d8983SJohannes Doerfert /// a value, and an optional unit for the value. For printing purposes, the 121330d8983SJohannes Doerfert /// information can be classified into several levels. These levels are useful 122330d8983SJohannes Doerfert /// for defining sections and subsections. Thus, each key-value property also 123330d8983SJohannes Doerfert /// has an additional field indicating to which level belongs to. Notice that 124330d8983SJohannes Doerfert /// we use the level to determine the indentation of the key-value property at 125330d8983SJohannes Doerfert /// printing time. See the enum InfoLevelKind for the list of accepted levels. 126330d8983SJohannes Doerfert class InfoQueueTy { 127fd3907ccSCallum Fare public: 128330d8983SJohannes Doerfert struct InfoQueueEntryTy { 129330d8983SJohannes Doerfert std::string Key; 130330d8983SJohannes Doerfert std::string Value; 131330d8983SJohannes Doerfert std::string Units; 132330d8983SJohannes Doerfert uint64_t Level; 133330d8983SJohannes Doerfert }; 134330d8983SJohannes Doerfert 135fd3907ccSCallum Fare private: 136330d8983SJohannes Doerfert std::deque<InfoQueueEntryTy> Queue; 137330d8983SJohannes Doerfert 138330d8983SJohannes Doerfert public: 139330d8983SJohannes Doerfert /// Add a new info entry to the queue. The entry requires at least a key 140330d8983SJohannes Doerfert /// string in \p Key. The value in \p Value is optional and can be any type 141330d8983SJohannes Doerfert /// that is representable as a string. The units in \p Units is optional and 142330d8983SJohannes Doerfert /// must be a string. The info level is a template parameter that defaults to 143330d8983SJohannes Doerfert /// the first level (top level). 144330d8983SJohannes Doerfert template <InfoLevelKind L = InfoLevel1, typename T = std::string> 145330d8983SJohannes Doerfert void add(const std::string &Key, T Value = T(), 146330d8983SJohannes Doerfert const std::string &Units = std::string()) { 147330d8983SJohannes Doerfert assert(!Key.empty() && "Invalid info key"); 148330d8983SJohannes Doerfert 149330d8983SJohannes Doerfert // Convert the value to a string depending on its type. 150330d8983SJohannes Doerfert if constexpr (std::is_same_v<T, bool>) 151330d8983SJohannes Doerfert Queue.push_back({Key, Value ? "Yes" : "No", Units, L}); 152330d8983SJohannes Doerfert else if constexpr (std::is_arithmetic_v<T>) 153330d8983SJohannes Doerfert Queue.push_back({Key, std::to_string(Value), Units, L}); 154330d8983SJohannes Doerfert else 155330d8983SJohannes Doerfert Queue.push_back({Key, Value, Units, L}); 156330d8983SJohannes Doerfert } 157330d8983SJohannes Doerfert 158fd3907ccSCallum Fare const std::deque<InfoQueueEntryTy> &getQueue() const { return Queue; } 159fd3907ccSCallum Fare 160330d8983SJohannes Doerfert /// Print all info entries added to the queue. 161330d8983SJohannes Doerfert void print() const { 162330d8983SJohannes Doerfert // We print four spances for each level. 163330d8983SJohannes Doerfert constexpr uint64_t IndentSize = 4; 164330d8983SJohannes Doerfert 165330d8983SJohannes Doerfert // Find the maximum key length (level + key) to compute the individual 166330d8983SJohannes Doerfert // indentation of each entry. 167330d8983SJohannes Doerfert uint64_t MaxKeySize = 0; 168330d8983SJohannes Doerfert for (const auto &Entry : Queue) { 169330d8983SJohannes Doerfert uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize; 170330d8983SJohannes Doerfert if (KeySize > MaxKeySize) 171330d8983SJohannes Doerfert MaxKeySize = KeySize; 172330d8983SJohannes Doerfert } 173330d8983SJohannes Doerfert 174330d8983SJohannes Doerfert // Print all info entries. 175330d8983SJohannes Doerfert for (const auto &Entry : Queue) { 176330d8983SJohannes Doerfert // Compute the indentations for the current entry. 177330d8983SJohannes Doerfert uint64_t KeyIndentSize = Entry.Level * IndentSize; 178330d8983SJohannes Doerfert uint64_t ValIndentSize = 179330d8983SJohannes Doerfert MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize; 180330d8983SJohannes Doerfert 181330d8983SJohannes Doerfert llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key 182330d8983SJohannes Doerfert << std::string(ValIndentSize, ' ') << Entry.Value 183330d8983SJohannes Doerfert << (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n"; 184330d8983SJohannes Doerfert } 185330d8983SJohannes Doerfert } 186330d8983SJohannes Doerfert }; 187330d8983SJohannes Doerfert 188330d8983SJohannes Doerfert /// Class wrapping a __tgt_device_image and its offload entry table on a 189330d8983SJohannes Doerfert /// specific device. This class is responsible for storing and managing 190330d8983SJohannes Doerfert /// the offload entries for an image on a device. 191330d8983SJohannes Doerfert class DeviceImageTy { 192330d8983SJohannes Doerfert /// Image identifier within the corresponding device. Notice that this id is 193330d8983SJohannes Doerfert /// not unique between different device; they may overlap. 194330d8983SJohannes Doerfert int32_t ImageId; 195330d8983SJohannes Doerfert 196330d8983SJohannes Doerfert /// The pointer to the raw __tgt_device_image. 197330d8983SJohannes Doerfert const __tgt_device_image *TgtImage; 198330d8983SJohannes Doerfert const __tgt_device_image *TgtImageBitcode; 199330d8983SJohannes Doerfert 200330d8983SJohannes Doerfert /// Reference to the device this image is loaded on. 201330d8983SJohannes Doerfert GenericDeviceTy &Device; 202330d8983SJohannes Doerfert 203330d8983SJohannes Doerfert /// If this image has any global destructors that much be called. 204330d8983SJohannes Doerfert /// FIXME: This is only required because we currently have no invariants 205330d8983SJohannes Doerfert /// towards the lifetime of the underlying image. We should either copy 206330d8983SJohannes Doerfert /// the image into memory locally or erase the pointers after init. 207330d8983SJohannes Doerfert bool PendingGlobalDtors; 208330d8983SJohannes Doerfert 209330d8983SJohannes Doerfert public: 210330d8983SJohannes Doerfert DeviceImageTy(int32_t Id, GenericDeviceTy &Device, 211330d8983SJohannes Doerfert const __tgt_device_image *Image) 212330d8983SJohannes Doerfert : ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr), Device(Device), 213330d8983SJohannes Doerfert PendingGlobalDtors(false) { 214330d8983SJohannes Doerfert assert(TgtImage && "Invalid target image"); 215330d8983SJohannes Doerfert } 216330d8983SJohannes Doerfert 217330d8983SJohannes Doerfert /// Get the image identifier within the device. 218330d8983SJohannes Doerfert int32_t getId() const { return ImageId; } 219330d8983SJohannes Doerfert 220330d8983SJohannes Doerfert /// Get the device that this image is loaded onto. 221330d8983SJohannes Doerfert GenericDeviceTy &getDevice() const { return Device; } 222330d8983SJohannes Doerfert 223330d8983SJohannes Doerfert /// Get the pointer to the raw __tgt_device_image. 224330d8983SJohannes Doerfert const __tgt_device_image *getTgtImage() const { return TgtImage; } 225330d8983SJohannes Doerfert 226330d8983SJohannes Doerfert void setTgtImageBitcode(const __tgt_device_image *TgtImageBitcode) { 227330d8983SJohannes Doerfert this->TgtImageBitcode = TgtImageBitcode; 228330d8983SJohannes Doerfert } 229330d8983SJohannes Doerfert 230330d8983SJohannes Doerfert const __tgt_device_image *getTgtImageBitcode() const { 231330d8983SJohannes Doerfert return TgtImageBitcode; 232330d8983SJohannes Doerfert } 233330d8983SJohannes Doerfert 234330d8983SJohannes Doerfert /// Get the image starting address. 235330d8983SJohannes Doerfert void *getStart() const { return TgtImage->ImageStart; } 236330d8983SJohannes Doerfert 237330d8983SJohannes Doerfert /// Get the image size. 238330d8983SJohannes Doerfert size_t getSize() const { 23908533a3eSJohannes Doerfert return utils::getPtrDiff(TgtImage->ImageEnd, TgtImage->ImageStart); 240330d8983SJohannes Doerfert } 241330d8983SJohannes Doerfert 242330d8983SJohannes Doerfert /// Get a memory buffer reference to the whole image. 243330d8983SJohannes Doerfert MemoryBufferRef getMemoryBuffer() const { 244330d8983SJohannes Doerfert return MemoryBufferRef(StringRef((const char *)getStart(), getSize()), 245330d8983SJohannes Doerfert "Image"); 246330d8983SJohannes Doerfert } 247330d8983SJohannes Doerfert /// Accessors to the boolean value 248330d8983SJohannes Doerfert bool setPendingGlobalDtors() { return PendingGlobalDtors = true; } 249330d8983SJohannes Doerfert bool hasPendingGlobalDtors() const { return PendingGlobalDtors; } 250330d8983SJohannes Doerfert }; 251330d8983SJohannes Doerfert 252330d8983SJohannes Doerfert /// Class implementing common functionalities of offload kernels. Each plugin 253330d8983SJohannes Doerfert /// should define the specific kernel class, derive from this generic one, and 254330d8983SJohannes Doerfert /// implement the necessary virtual function members. 255330d8983SJohannes Doerfert struct GenericKernelTy { 256330d8983SJohannes Doerfert /// Construct a kernel with a name and a execution mode. 257330d8983SJohannes Doerfert GenericKernelTy(const char *Name) 258330d8983SJohannes Doerfert : Name(Name), PreferredNumThreads(0), MaxNumThreads(0) {} 259330d8983SJohannes Doerfert 260330d8983SJohannes Doerfert virtual ~GenericKernelTy() {} 261330d8983SJohannes Doerfert 262330d8983SJohannes Doerfert /// Initialize the kernel object from a specific device. 263330d8983SJohannes Doerfert Error init(GenericDeviceTy &GenericDevice, DeviceImageTy &Image); 264330d8983SJohannes Doerfert virtual Error initImpl(GenericDeviceTy &GenericDevice, 265330d8983SJohannes Doerfert DeviceImageTy &Image) = 0; 266330d8983SJohannes Doerfert 267330d8983SJohannes Doerfert /// Launch the kernel on the specific device. The device must be the same 268330d8983SJohannes Doerfert /// one used to initialize the kernel. 269330d8983SJohannes Doerfert Error launch(GenericDeviceTy &GenericDevice, void **ArgPtrs, 270330d8983SJohannes Doerfert ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs, 271330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfoWrapper) const; 272*92376c3fSShilei Tian virtual Error launchImpl(GenericDeviceTy &GenericDevice, 273*92376c3fSShilei Tian uint32_t NumThreads[3], uint32_t NumBlocks[3], 274*92376c3fSShilei Tian KernelArgsTy &KernelArgs, 27554b5c76dSJohannes Doerfert KernelLaunchParamsTy LaunchParams, 276330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfoWrapper) const = 0; 277330d8983SJohannes Doerfert 278330d8983SJohannes Doerfert /// Get the kernel name. 279330d8983SJohannes Doerfert const char *getName() const { return Name; } 280330d8983SJohannes Doerfert 281330d8983SJohannes Doerfert /// Get the kernel image. 282330d8983SJohannes Doerfert DeviceImageTy &getImage() const { 283330d8983SJohannes Doerfert assert(ImagePtr && "Kernel is not initialized!"); 284330d8983SJohannes Doerfert return *ImagePtr; 285330d8983SJohannes Doerfert } 286330d8983SJohannes Doerfert 287330d8983SJohannes Doerfert /// Return the kernel environment object for kernel \p Name. 288330d8983SJohannes Doerfert const KernelEnvironmentTy &getKernelEnvironmentForKernel() { 289330d8983SJohannes Doerfert return KernelEnvironment; 290330d8983SJohannes Doerfert } 291330d8983SJohannes Doerfert 292330d8983SJohannes Doerfert /// Return a device pointer to a new kernel launch environment. 293330d8983SJohannes Doerfert Expected<KernelLaunchEnvironmentTy *> 294330d8983SJohannes Doerfert getKernelLaunchEnvironment(GenericDeviceTy &GenericDevice, uint32_t Version, 295330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfo) const; 296330d8983SJohannes Doerfert 297330d8983SJohannes Doerfert /// Indicate whether an execution mode is valid. 298330d8983SJohannes Doerfert static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) { 299330d8983SJohannes Doerfert switch (ExecutionMode) { 300330d8983SJohannes Doerfert case OMP_TGT_EXEC_MODE_SPMD: 301330d8983SJohannes Doerfert case OMP_TGT_EXEC_MODE_GENERIC: 302330d8983SJohannes Doerfert case OMP_TGT_EXEC_MODE_GENERIC_SPMD: 303330d8983SJohannes Doerfert return true; 304330d8983SJohannes Doerfert } 305330d8983SJohannes Doerfert return false; 306330d8983SJohannes Doerfert } 307330d8983SJohannes Doerfert 308330d8983SJohannes Doerfert protected: 309330d8983SJohannes Doerfert /// Get the execution mode name of the kernel. 310330d8983SJohannes Doerfert const char *getExecutionModeName() const { 311330d8983SJohannes Doerfert switch (KernelEnvironment.Configuration.ExecMode) { 312330d8983SJohannes Doerfert case OMP_TGT_EXEC_MODE_SPMD: 313330d8983SJohannes Doerfert return "SPMD"; 314330d8983SJohannes Doerfert case OMP_TGT_EXEC_MODE_GENERIC: 315330d8983SJohannes Doerfert return "Generic"; 316330d8983SJohannes Doerfert case OMP_TGT_EXEC_MODE_GENERIC_SPMD: 317330d8983SJohannes Doerfert return "Generic-SPMD"; 318330d8983SJohannes Doerfert } 319330d8983SJohannes Doerfert llvm_unreachable("Unknown execution mode!"); 320330d8983SJohannes Doerfert } 321330d8983SJohannes Doerfert 322330d8983SJohannes Doerfert /// Prints generic kernel launch information. 323330d8983SJohannes Doerfert Error printLaunchInfo(GenericDeviceTy &GenericDevice, 324*92376c3fSShilei Tian KernelArgsTy &KernelArgs, uint32_t NumThreads[3], 325*92376c3fSShilei Tian uint32_t NumBlocks[3]) const; 326330d8983SJohannes Doerfert 327330d8983SJohannes Doerfert /// Prints plugin-specific kernel launch information after generic kernel 328330d8983SJohannes Doerfert /// launch information 329330d8983SJohannes Doerfert virtual Error printLaunchInfoDetails(GenericDeviceTy &GenericDevice, 330330d8983SJohannes Doerfert KernelArgsTy &KernelArgs, 331*92376c3fSShilei Tian uint32_t NumThreads[3], 332*92376c3fSShilei Tian uint32_t NumBlocks[3]) const; 333330d8983SJohannes Doerfert 334330d8983SJohannes Doerfert private: 335330d8983SJohannes Doerfert /// Prepare the arguments before launching the kernel. 33654b5c76dSJohannes Doerfert KernelLaunchParamsTy 33754b5c76dSJohannes Doerfert prepareArgs(GenericDeviceTy &GenericDevice, void **ArgPtrs, 338330d8983SJohannes Doerfert ptrdiff_t *ArgOffsets, uint32_t &NumArgs, 339330d8983SJohannes Doerfert llvm::SmallVectorImpl<void *> &Args, 340330d8983SJohannes Doerfert llvm::SmallVectorImpl<void *> &Ptrs, 341330d8983SJohannes Doerfert KernelLaunchEnvironmentTy *KernelLaunchEnvironment) const; 342330d8983SJohannes Doerfert 343330d8983SJohannes Doerfert /// Get the number of threads and blocks for the kernel based on the 344330d8983SJohannes Doerfert /// user-defined threads and block clauses. 345330d8983SJohannes Doerfert uint32_t getNumThreads(GenericDeviceTy &GenericDevice, 346330d8983SJohannes Doerfert uint32_t ThreadLimitClause[3]) const; 347330d8983SJohannes Doerfert 348330d8983SJohannes Doerfert /// The number of threads \p NumThreads can be adjusted by this method. 349330d8983SJohannes Doerfert /// \p IsNumThreadsFromUser is true is \p NumThreads is defined by user via 350330d8983SJohannes Doerfert /// thread_limit clause. 351*92376c3fSShilei Tian uint32_t getNumBlocks(GenericDeviceTy &GenericDevice, 352330d8983SJohannes Doerfert uint32_t BlockLimitClause[3], uint64_t LoopTripCount, 353330d8983SJohannes Doerfert uint32_t &NumThreads, bool IsNumThreadsFromUser) const; 354330d8983SJohannes Doerfert 355330d8983SJohannes Doerfert /// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode. 356330d8983SJohannes Doerfert bool isGenericSPMDMode() const { 357330d8983SJohannes Doerfert return KernelEnvironment.Configuration.ExecMode == 358330d8983SJohannes Doerfert OMP_TGT_EXEC_MODE_GENERIC_SPMD; 359330d8983SJohannes Doerfert } 360330d8983SJohannes Doerfert bool isGenericMode() const { 361330d8983SJohannes Doerfert return KernelEnvironment.Configuration.ExecMode == 362330d8983SJohannes Doerfert OMP_TGT_EXEC_MODE_GENERIC; 363330d8983SJohannes Doerfert } 364330d8983SJohannes Doerfert bool isSPMDMode() const { 365330d8983SJohannes Doerfert return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_SPMD; 366330d8983SJohannes Doerfert } 367330d8983SJohannes Doerfert 368330d8983SJohannes Doerfert /// The kernel name. 369330d8983SJohannes Doerfert const char *Name; 370330d8983SJohannes Doerfert 371330d8983SJohannes Doerfert /// The image that contains this kernel. 372330d8983SJohannes Doerfert DeviceImageTy *ImagePtr = nullptr; 373330d8983SJohannes Doerfert 374330d8983SJohannes Doerfert protected: 375330d8983SJohannes Doerfert /// The preferred number of threads to run the kernel. 376330d8983SJohannes Doerfert uint32_t PreferredNumThreads; 377330d8983SJohannes Doerfert 378330d8983SJohannes Doerfert /// The maximum number of threads which the kernel could leverage. 379330d8983SJohannes Doerfert uint32_t MaxNumThreads; 380330d8983SJohannes Doerfert 381330d8983SJohannes Doerfert /// The kernel environment, including execution flags. 382330d8983SJohannes Doerfert KernelEnvironmentTy KernelEnvironment; 383330d8983SJohannes Doerfert 384330d8983SJohannes Doerfert /// The prototype kernel launch environment. 385330d8983SJohannes Doerfert KernelLaunchEnvironmentTy KernelLaunchEnvironment; 386330d8983SJohannes Doerfert 387330d8983SJohannes Doerfert /// If the kernel is a bare kernel. 388330d8983SJohannes Doerfert bool IsBareKernel = false; 389330d8983SJohannes Doerfert }; 390330d8983SJohannes Doerfert 391c95abe94SJohannes Doerfert /// Information about an allocation, when it has been allocated, and when/if it 392c95abe94SJohannes Doerfert /// has been deallocated, for error reporting purposes. 393c95abe94SJohannes Doerfert struct AllocationTraceInfoTy { 394c95abe94SJohannes Doerfert 395c95abe94SJohannes Doerfert /// The stack trace of the allocation itself. 396c95abe94SJohannes Doerfert std::string AllocationTrace; 397c95abe94SJohannes Doerfert 398c95abe94SJohannes Doerfert /// The stack trace of the deallocation, or empty. 399c95abe94SJohannes Doerfert std::string DeallocationTrace; 400c95abe94SJohannes Doerfert 401c95abe94SJohannes Doerfert /// The allocated device pointer. 402c95abe94SJohannes Doerfert void *DevicePtr = nullptr; 403c95abe94SJohannes Doerfert 404c95abe94SJohannes Doerfert /// The corresponding host pointer (can be null). 405c95abe94SJohannes Doerfert void *HostPtr = nullptr; 406c95abe94SJohannes Doerfert 407c95abe94SJohannes Doerfert /// The size of the allocation. 408c95abe94SJohannes Doerfert uint64_t Size = 0; 409c95abe94SJohannes Doerfert 410c95abe94SJohannes Doerfert /// The kind of the allocation. 411c95abe94SJohannes Doerfert TargetAllocTy Kind = TargetAllocTy::TARGET_ALLOC_DEFAULT; 412c95abe94SJohannes Doerfert 413c95abe94SJohannes Doerfert /// Information about the last allocation at this address, if any. 414c95abe94SJohannes Doerfert AllocationTraceInfoTy *LastAllocationInfo = nullptr; 415c95abe94SJohannes Doerfert 416c95abe94SJohannes Doerfert /// Lock to keep accesses race free. 417c95abe94SJohannes Doerfert std::mutex Lock; 418c95abe94SJohannes Doerfert }; 419c95abe94SJohannes Doerfert 4209a101322SJohannes Doerfert /// Information about an allocation, when it has been allocated, and when/if it 4219a101322SJohannes Doerfert /// has been deallocated, for error reporting purposes. 4229a101322SJohannes Doerfert struct KernelTraceInfoTy { 4239a101322SJohannes Doerfert 4249a101322SJohannes Doerfert /// The launched kernel. 4259a101322SJohannes Doerfert GenericKernelTy *Kernel; 4269a101322SJohannes Doerfert 4279a101322SJohannes Doerfert /// The stack trace of the launch itself. 4289a101322SJohannes Doerfert std::string LaunchTrace; 4299a101322SJohannes Doerfert 4309a101322SJohannes Doerfert /// The async info the kernel was launched in. 4319a101322SJohannes Doerfert __tgt_async_info *AsyncInfo; 4329a101322SJohannes Doerfert }; 4339a101322SJohannes Doerfert 4349a101322SJohannes Doerfert struct KernelTraceInfoRecordTy { 4359a101322SJohannes Doerfert KernelTraceInfoRecordTy() { KTIs.fill({}); } 4369a101322SJohannes Doerfert 4379a101322SJohannes Doerfert /// Return the (maximal) record size. 4389a101322SJohannes Doerfert auto size() const { return KTIs.size(); } 4399a101322SJohannes Doerfert 4409a101322SJohannes Doerfert /// Create a new kernel trace info and add it into the record. 4419a101322SJohannes Doerfert void emplace(GenericKernelTy *Kernel, const std::string &&StackTrace, 4429a101322SJohannes Doerfert __tgt_async_info *AsyncInfo) { 4439a101322SJohannes Doerfert KTIs[Idx] = {Kernel, std::move(StackTrace), AsyncInfo}; 4449a101322SJohannes Doerfert Idx = (Idx + 1) % size(); 4459a101322SJohannes Doerfert } 4469a101322SJohannes Doerfert 4479a101322SJohannes Doerfert /// Return the \p I'th last kernel trace info. 4489a101322SJohannes Doerfert auto getKernelTraceInfo(int32_t I) const { 4499a101322SJohannes Doerfert // Note that kernel trace infos "grow forward", so lookup is backwards. 4509a101322SJohannes Doerfert return KTIs[(Idx - I - 1 + size()) % size()]; 4519a101322SJohannes Doerfert } 4529a101322SJohannes Doerfert 4539a101322SJohannes Doerfert private: 4549a101322SJohannes Doerfert std::array<KernelTraceInfoTy, 8> KTIs; 4559a101322SJohannes Doerfert unsigned Idx = 0; 4569a101322SJohannes Doerfert }; 4579a101322SJohannes Doerfert 458330d8983SJohannes Doerfert /// Class representing a map of host pinned allocations. We track these pinned 459330d8983SJohannes Doerfert /// allocations, so memory tranfers invloving these buffers can be optimized. 460330d8983SJohannes Doerfert class PinnedAllocationMapTy { 461330d8983SJohannes Doerfert 462330d8983SJohannes Doerfert /// Struct representing a map entry. 463330d8983SJohannes Doerfert struct EntryTy { 464330d8983SJohannes Doerfert /// The host pointer of the pinned allocation. 465330d8983SJohannes Doerfert void *HstPtr; 466330d8983SJohannes Doerfert 467330d8983SJohannes Doerfert /// The pointer that devices' driver should use to transfer data from/to the 468330d8983SJohannes Doerfert /// pinned allocation. In most plugins, this pointer will be the same as the 469330d8983SJohannes Doerfert /// host pointer above. 470330d8983SJohannes Doerfert void *DevAccessiblePtr; 471330d8983SJohannes Doerfert 472330d8983SJohannes Doerfert /// The size of the pinned allocation. 473330d8983SJohannes Doerfert size_t Size; 474330d8983SJohannes Doerfert 475330d8983SJohannes Doerfert /// Indicate whether the allocation was locked from outside the plugin, for 476330d8983SJohannes Doerfert /// instance, from the application. The externally locked allocations are 477330d8983SJohannes Doerfert /// not unlocked by the plugin when unregistering the last user. 478330d8983SJohannes Doerfert bool ExternallyLocked; 479330d8983SJohannes Doerfert 480330d8983SJohannes Doerfert /// The number of references to the pinned allocation. The allocation should 481330d8983SJohannes Doerfert /// remain pinned and registered to the map until the number of references 482330d8983SJohannes Doerfert /// becomes zero. 483330d8983SJohannes Doerfert mutable size_t References; 484330d8983SJohannes Doerfert 485330d8983SJohannes Doerfert /// Create an entry with the host and device acessible pointers, the buffer 486330d8983SJohannes Doerfert /// size, and a boolean indicating whether the buffer was locked externally. 487330d8983SJohannes Doerfert EntryTy(void *HstPtr, void *DevAccessiblePtr, size_t Size, 488330d8983SJohannes Doerfert bool ExternallyLocked) 489330d8983SJohannes Doerfert : HstPtr(HstPtr), DevAccessiblePtr(DevAccessiblePtr), Size(Size), 490330d8983SJohannes Doerfert ExternallyLocked(ExternallyLocked), References(1) {} 491330d8983SJohannes Doerfert 492330d8983SJohannes Doerfert /// Utility constructor used for std::set searches. 493330d8983SJohannes Doerfert EntryTy(void *HstPtr) 494330d8983SJohannes Doerfert : HstPtr(HstPtr), DevAccessiblePtr(nullptr), Size(0), 495330d8983SJohannes Doerfert ExternallyLocked(false), References(0) {} 496330d8983SJohannes Doerfert }; 497330d8983SJohannes Doerfert 498330d8983SJohannes Doerfert /// Comparator of mep entries. Use the host pointer to enforce an order 499330d8983SJohannes Doerfert /// between entries. 500330d8983SJohannes Doerfert struct EntryCmpTy { 501330d8983SJohannes Doerfert bool operator()(const EntryTy &Left, const EntryTy &Right) const { 502330d8983SJohannes Doerfert return Left.HstPtr < Right.HstPtr; 503330d8983SJohannes Doerfert } 504330d8983SJohannes Doerfert }; 505330d8983SJohannes Doerfert 506330d8983SJohannes Doerfert typedef std::set<EntryTy, EntryCmpTy> PinnedAllocSetTy; 507330d8983SJohannes Doerfert 508330d8983SJohannes Doerfert /// The map of host pinned allocations. 509330d8983SJohannes Doerfert PinnedAllocSetTy Allocs; 510330d8983SJohannes Doerfert 511330d8983SJohannes Doerfert /// The mutex to protect accesses to the map. 512330d8983SJohannes Doerfert mutable std::shared_mutex Mutex; 513330d8983SJohannes Doerfert 514330d8983SJohannes Doerfert /// Reference to the corresponding device. 515330d8983SJohannes Doerfert GenericDeviceTy &Device; 516330d8983SJohannes Doerfert 517330d8983SJohannes Doerfert /// Indicate whether mapped host buffers should be locked automatically. 518330d8983SJohannes Doerfert bool LockMappedBuffers; 519330d8983SJohannes Doerfert 520330d8983SJohannes Doerfert /// Indicate whether failures when locking mapped buffers should be ingored. 521330d8983SJohannes Doerfert bool IgnoreLockMappedFailures; 522330d8983SJohannes Doerfert 523330d8983SJohannes Doerfert /// Find an allocation that intersects with \p HstPtr pointer. Assume the 524330d8983SJohannes Doerfert /// map's mutex is acquired. 525330d8983SJohannes Doerfert const EntryTy *findIntersecting(const void *HstPtr) const { 526330d8983SJohannes Doerfert if (Allocs.empty()) 527330d8983SJohannes Doerfert return nullptr; 528330d8983SJohannes Doerfert 529330d8983SJohannes Doerfert // Search the first allocation with starting address that is not less than 530330d8983SJohannes Doerfert // the buffer address. 531330d8983SJohannes Doerfert auto It = Allocs.lower_bound({const_cast<void *>(HstPtr)}); 532330d8983SJohannes Doerfert 533330d8983SJohannes Doerfert // Direct match of starting addresses. 534330d8983SJohannes Doerfert if (It != Allocs.end() && It->HstPtr == HstPtr) 535330d8983SJohannes Doerfert return &(*It); 536330d8983SJohannes Doerfert 537330d8983SJohannes Doerfert // Not direct match but may be a previous pinned allocation in the map which 538330d8983SJohannes Doerfert // contains the buffer. Return false if there is no such a previous 539330d8983SJohannes Doerfert // allocation. 540330d8983SJohannes Doerfert if (It == Allocs.begin()) 541330d8983SJohannes Doerfert return nullptr; 542330d8983SJohannes Doerfert 543330d8983SJohannes Doerfert // Move to the previous pinned allocation. 544330d8983SJohannes Doerfert --It; 545330d8983SJohannes Doerfert 546330d8983SJohannes Doerfert // The buffer is not contained in the pinned allocation. 54708533a3eSJohannes Doerfert if (utils::advancePtr(It->HstPtr, It->Size) > HstPtr) 548330d8983SJohannes Doerfert return &(*It); 549330d8983SJohannes Doerfert 550330d8983SJohannes Doerfert // None found. 551330d8983SJohannes Doerfert return nullptr; 552330d8983SJohannes Doerfert } 553330d8983SJohannes Doerfert 554330d8983SJohannes Doerfert /// Insert an entry to the map representing a locked buffer. The number of 555330d8983SJohannes Doerfert /// references is set to one. 556330d8983SJohannes Doerfert Error insertEntry(void *HstPtr, void *DevAccessiblePtr, size_t Size, 557330d8983SJohannes Doerfert bool ExternallyLocked = false); 558330d8983SJohannes Doerfert 559330d8983SJohannes Doerfert /// Erase an existing entry from the map. 560330d8983SJohannes Doerfert Error eraseEntry(const EntryTy &Entry); 561330d8983SJohannes Doerfert 562330d8983SJohannes Doerfert /// Register a new user into an entry that represents a locked buffer. Check 563330d8983SJohannes Doerfert /// also that the registered buffer with \p HstPtr address and \p Size is 564330d8983SJohannes Doerfert /// actually contained into the entry. 565330d8983SJohannes Doerfert Error registerEntryUse(const EntryTy &Entry, void *HstPtr, size_t Size); 566330d8983SJohannes Doerfert 567330d8983SJohannes Doerfert /// Unregister a user from the entry and return whether it is the last user. 568330d8983SJohannes Doerfert /// If it is the last user, the entry will have to be removed from the map 569330d8983SJohannes Doerfert /// and unlock the entry's host buffer (if necessary). 570330d8983SJohannes Doerfert Expected<bool> unregisterEntryUse(const EntryTy &Entry); 571330d8983SJohannes Doerfert 572330d8983SJohannes Doerfert /// Indicate whether the first range A fully contains the second range B. 573330d8983SJohannes Doerfert static bool contains(void *PtrA, size_t SizeA, void *PtrB, size_t SizeB) { 57408533a3eSJohannes Doerfert void *EndA = utils::advancePtr(PtrA, SizeA); 57508533a3eSJohannes Doerfert void *EndB = utils::advancePtr(PtrB, SizeB); 576330d8983SJohannes Doerfert return (PtrB >= PtrA && EndB <= EndA); 577330d8983SJohannes Doerfert } 578330d8983SJohannes Doerfert 579330d8983SJohannes Doerfert /// Indicate whether the first range A intersects with the second range B. 580330d8983SJohannes Doerfert static bool intersects(void *PtrA, size_t SizeA, void *PtrB, size_t SizeB) { 58108533a3eSJohannes Doerfert void *EndA = utils::advancePtr(PtrA, SizeA); 58208533a3eSJohannes Doerfert void *EndB = utils::advancePtr(PtrB, SizeB); 583330d8983SJohannes Doerfert return (PtrA < EndB && PtrB < EndA); 584330d8983SJohannes Doerfert } 585330d8983SJohannes Doerfert 586330d8983SJohannes Doerfert public: 587330d8983SJohannes Doerfert /// Create the map of pinned allocations corresponding to a specific device. 588330d8983SJohannes Doerfert PinnedAllocationMapTy(GenericDeviceTy &Device) : Device(Device) { 589330d8983SJohannes Doerfert 590330d8983SJohannes Doerfert // Envar that indicates whether mapped host buffers should be locked 591330d8983SJohannes Doerfert // automatically. The possible values are boolean (on/off) and a special: 592330d8983SJohannes Doerfert // off: Mapped host buffers are not locked. 593330d8983SJohannes Doerfert // on: Mapped host buffers are locked in a best-effort approach. 594330d8983SJohannes Doerfert // Failure to lock the buffers are silent. 595330d8983SJohannes Doerfert // mandatory: Mapped host buffers are always locked and failures to lock 596330d8983SJohannes Doerfert // a buffer results in a fatal error. 597330d8983SJohannes Doerfert StringEnvar OMPX_LockMappedBuffers("LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS", 598330d8983SJohannes Doerfert "off"); 599330d8983SJohannes Doerfert 600330d8983SJohannes Doerfert bool Enabled; 601330d8983SJohannes Doerfert if (StringParser::parse(OMPX_LockMappedBuffers.get().data(), Enabled)) { 602330d8983SJohannes Doerfert // Parsed as a boolean value. Enable the feature if necessary. 603330d8983SJohannes Doerfert LockMappedBuffers = Enabled; 604330d8983SJohannes Doerfert IgnoreLockMappedFailures = true; 605330d8983SJohannes Doerfert } else if (OMPX_LockMappedBuffers.get() == "mandatory") { 606330d8983SJohannes Doerfert // Enable the feature and failures are fatal. 607330d8983SJohannes Doerfert LockMappedBuffers = true; 608330d8983SJohannes Doerfert IgnoreLockMappedFailures = false; 609330d8983SJohannes Doerfert } else { 610330d8983SJohannes Doerfert // Disable by default. 611330d8983SJohannes Doerfert DP("Invalid value LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS=%s\n", 612330d8983SJohannes Doerfert OMPX_LockMappedBuffers.get().data()); 613330d8983SJohannes Doerfert LockMappedBuffers = false; 614330d8983SJohannes Doerfert } 615330d8983SJohannes Doerfert } 616330d8983SJohannes Doerfert 617330d8983SJohannes Doerfert /// Register a buffer that was recently allocated as a locked host buffer. 618330d8983SJohannes Doerfert /// None of the already registered pinned allocations should intersect with 619330d8983SJohannes Doerfert /// this new one. The registration requires the host pointer in \p HstPtr, 620330d8983SJohannes Doerfert /// the device accessible pointer in \p DevAccessiblePtr, and the size of the 621330d8983SJohannes Doerfert /// allocation in \p Size. The allocation must be unregistered using the 622330d8983SJohannes Doerfert /// unregisterHostBuffer function. 623330d8983SJohannes Doerfert Error registerHostBuffer(void *HstPtr, void *DevAccessiblePtr, size_t Size); 624330d8983SJohannes Doerfert 625330d8983SJohannes Doerfert /// Unregister a host pinned allocation passing the host pointer which was 626330d8983SJohannes Doerfert /// previously registered using the registerHostBuffer function. When calling 627330d8983SJohannes Doerfert /// this function, the pinned allocation cannot have any other user and will 628330d8983SJohannes Doerfert /// not be unlocked by this function. 629330d8983SJohannes Doerfert Error unregisterHostBuffer(void *HstPtr); 630330d8983SJohannes Doerfert 631330d8983SJohannes Doerfert /// Lock the host buffer at \p HstPtr or register a new user if it intersects 632330d8983SJohannes Doerfert /// with an already existing one. A partial overlapping with extension is not 633330d8983SJohannes Doerfert /// allowed. The function returns the device accessible pointer of the pinned 634330d8983SJohannes Doerfert /// buffer. The buffer must be unlocked using the unlockHostBuffer function. 635330d8983SJohannes Doerfert Expected<void *> lockHostBuffer(void *HstPtr, size_t Size); 636330d8983SJohannes Doerfert 637330d8983SJohannes Doerfert /// Unlock the host buffer at \p HstPtr or unregister a user if other users 638330d8983SJohannes Doerfert /// are still using the pinned allocation. If this was the last user, the 639330d8983SJohannes Doerfert /// pinned allocation is removed from the map and the memory is unlocked. 640330d8983SJohannes Doerfert Error unlockHostBuffer(void *HstPtr); 641330d8983SJohannes Doerfert 642330d8983SJohannes Doerfert /// Lock or register a host buffer that was recently mapped by libomptarget. 643330d8983SJohannes Doerfert /// This behavior is applied if LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS is 644330d8983SJohannes Doerfert /// enabled. Even if not enabled, externally locked buffers are registered 645330d8983SJohannes Doerfert /// in order to optimize their transfers. 646330d8983SJohannes Doerfert Error lockMappedHostBuffer(void *HstPtr, size_t Size); 647330d8983SJohannes Doerfert 648330d8983SJohannes Doerfert /// Unlock or unregister a host buffer that was unmapped by libomptarget. 649330d8983SJohannes Doerfert Error unlockUnmappedHostBuffer(void *HstPtr); 650330d8983SJohannes Doerfert 651330d8983SJohannes Doerfert /// Return the device accessible pointer associated to the host pinned 652330d8983SJohannes Doerfert /// allocation which the \p HstPtr belongs, if any. Return null in case the 653330d8983SJohannes Doerfert /// \p HstPtr does not belong to any host pinned allocation. The device 654330d8983SJohannes Doerfert /// accessible pointer is the one that devices should use for data transfers 655330d8983SJohannes Doerfert /// that involve a host pinned buffer. 656330d8983SJohannes Doerfert void *getDeviceAccessiblePtrFromPinnedBuffer(const void *HstPtr) const { 657330d8983SJohannes Doerfert std::shared_lock<std::shared_mutex> Lock(Mutex); 658330d8983SJohannes Doerfert 659330d8983SJohannes Doerfert // Find the intersecting allocation if any. 660330d8983SJohannes Doerfert const EntryTy *Entry = findIntersecting(HstPtr); 661330d8983SJohannes Doerfert if (!Entry) 662330d8983SJohannes Doerfert return nullptr; 663330d8983SJohannes Doerfert 66408533a3eSJohannes Doerfert return utils::advancePtr(Entry->DevAccessiblePtr, 66508533a3eSJohannes Doerfert utils::getPtrDiff(HstPtr, Entry->HstPtr)); 666330d8983SJohannes Doerfert } 667330d8983SJohannes Doerfert 668330d8983SJohannes Doerfert /// Check whether a buffer belongs to a registered host pinned allocation. 669330d8983SJohannes Doerfert bool isHostPinnedBuffer(const void *HstPtr) const { 670330d8983SJohannes Doerfert std::shared_lock<std::shared_mutex> Lock(Mutex); 671330d8983SJohannes Doerfert 672330d8983SJohannes Doerfert // Return whether there is an intersecting allocation. 673330d8983SJohannes Doerfert return (findIntersecting(const_cast<void *>(HstPtr)) != nullptr); 674330d8983SJohannes Doerfert } 675330d8983SJohannes Doerfert }; 676330d8983SJohannes Doerfert 677330d8983SJohannes Doerfert /// Class implementing common functionalities of offload devices. Each plugin 678330d8983SJohannes Doerfert /// should define the specific device class, derive from this generic one, and 679330d8983SJohannes Doerfert /// implement the necessary virtual function members. 680330d8983SJohannes Doerfert struct GenericDeviceTy : public DeviceAllocatorTy { 681330d8983SJohannes Doerfert /// Construct a device with its device id within the plugin, the number of 682330d8983SJohannes Doerfert /// devices in the plugin and the grid values for that kind of device. 683330d8983SJohannes Doerfert GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices, 684330d8983SJohannes Doerfert const llvm::omp::GV &GridValues); 685330d8983SJohannes Doerfert 686330d8983SJohannes Doerfert /// Get the device identifier within the corresponding plugin. Notice that 687330d8983SJohannes Doerfert /// this id is not unique between different plugins; they may overlap. 688330d8983SJohannes Doerfert int32_t getDeviceId() const { return DeviceId; } 689330d8983SJohannes Doerfert 690330d8983SJohannes Doerfert /// Set the context of the device if needed, before calling device-specific 691330d8983SJohannes Doerfert /// functions. Plugins may implement this function as a no-op if not needed. 692330d8983SJohannes Doerfert virtual Error setContext() = 0; 693330d8983SJohannes Doerfert 694330d8983SJohannes Doerfert /// Initialize the device. After this call, the device should be already 695330d8983SJohannes Doerfert /// working and ready to accept queries or modifications. 696330d8983SJohannes Doerfert Error init(GenericPluginTy &Plugin); 697330d8983SJohannes Doerfert virtual Error initImpl(GenericPluginTy &Plugin) = 0; 698330d8983SJohannes Doerfert 699330d8983SJohannes Doerfert /// Deinitialize the device and free all its resources. After this call, the 700330d8983SJohannes Doerfert /// device is no longer considered ready, so no queries or modifications are 701330d8983SJohannes Doerfert /// allowed. 702330d8983SJohannes Doerfert Error deinit(GenericPluginTy &Plugin); 703330d8983SJohannes Doerfert virtual Error deinitImpl() = 0; 704330d8983SJohannes Doerfert 705330d8983SJohannes Doerfert /// Load the binary image into the device and return the target table. 706330d8983SJohannes Doerfert Expected<DeviceImageTy *> loadBinary(GenericPluginTy &Plugin, 707330d8983SJohannes Doerfert const __tgt_device_image *TgtImage); 708330d8983SJohannes Doerfert virtual Expected<DeviceImageTy *> 709330d8983SJohannes Doerfert loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0; 710330d8983SJohannes Doerfert 711330d8983SJohannes Doerfert /// Setup the device environment if needed. Notice this setup may not be run 712330d8983SJohannes Doerfert /// on some plugins. By default, it will be executed, but plugins can change 713330d8983SJohannes Doerfert /// this behavior by overriding the shouldSetupDeviceEnvironment function. 714330d8983SJohannes Doerfert Error setupDeviceEnvironment(GenericPluginTy &Plugin, DeviceImageTy &Image); 715330d8983SJohannes Doerfert 716330d8983SJohannes Doerfert /// Setup the global device memory pool, if the plugin requires one. 717330d8983SJohannes Doerfert Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image, 718330d8983SJohannes Doerfert uint64_t PoolSize); 719330d8983SJohannes Doerfert 720330d8983SJohannes Doerfert // Setup the RPC server for this device if needed. This may not run on some 721330d8983SJohannes Doerfert // plugins like the CPU targets. By default, it will not be executed so it is 722330d8983SJohannes Doerfert // up to the target to override this using the shouldSetupRPCServer function. 723330d8983SJohannes Doerfert Error setupRPCServer(GenericPluginTy &Plugin, DeviceImageTy &Image); 724330d8983SJohannes Doerfert 725330d8983SJohannes Doerfert /// Synchronize the current thread with the pending operations on the 726330d8983SJohannes Doerfert /// __tgt_async_info structure. 727330d8983SJohannes Doerfert Error synchronize(__tgt_async_info *AsyncInfo); 728330d8983SJohannes Doerfert virtual Error synchronizeImpl(__tgt_async_info &AsyncInfo) = 0; 729330d8983SJohannes Doerfert 730330d8983SJohannes Doerfert /// Invokes any global constructors on the device if present and is required 731330d8983SJohannes Doerfert /// by the target. 732330d8983SJohannes Doerfert virtual Error callGlobalConstructors(GenericPluginTy &Plugin, 733330d8983SJohannes Doerfert DeviceImageTy &Image) { 734330d8983SJohannes Doerfert return Error::success(); 735330d8983SJohannes Doerfert } 736330d8983SJohannes Doerfert 737330d8983SJohannes Doerfert /// Invokes any global destructors on the device if present and is required 738330d8983SJohannes Doerfert /// by the target. 739330d8983SJohannes Doerfert virtual Error callGlobalDestructors(GenericPluginTy &Plugin, 740330d8983SJohannes Doerfert DeviceImageTy &Image) { 741330d8983SJohannes Doerfert return Error::success(); 742330d8983SJohannes Doerfert } 743330d8983SJohannes Doerfert 744330d8983SJohannes Doerfert /// Query for the completion of the pending operations on the __tgt_async_info 745330d8983SJohannes Doerfert /// structure in a non-blocking manner. 746330d8983SJohannes Doerfert Error queryAsync(__tgt_async_info *AsyncInfo); 747330d8983SJohannes Doerfert virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo) = 0; 748330d8983SJohannes Doerfert 749330d8983SJohannes Doerfert /// Check whether the architecture supports VA management 750330d8983SJohannes Doerfert virtual bool supportVAManagement() const { return false; } 751330d8983SJohannes Doerfert 752330d8983SJohannes Doerfert /// Get the total device memory size 753330d8983SJohannes Doerfert virtual Error getDeviceMemorySize(uint64_t &DSize); 754330d8983SJohannes Doerfert 755330d8983SJohannes Doerfert /// Allocates \p RSize bytes (rounded up to page size) and hints the driver to 756330d8983SJohannes Doerfert /// map it to \p VAddr. The obtained address is stored in \p Addr. At return 757330d8983SJohannes Doerfert /// \p RSize contains the actual size which can be equal or larger than the 758330d8983SJohannes Doerfert /// requested size. 759330d8983SJohannes Doerfert virtual Error memoryVAMap(void **Addr, void *VAddr, size_t *RSize); 760330d8983SJohannes Doerfert 761330d8983SJohannes Doerfert /// De-allocates device memory and unmaps the virtual address \p VAddr 762330d8983SJohannes Doerfert virtual Error memoryVAUnMap(void *VAddr, size_t Size); 763330d8983SJohannes Doerfert 764330d8983SJohannes Doerfert /// Allocate data on the device or involving the device. 765330d8983SJohannes Doerfert Expected<void *> dataAlloc(int64_t Size, void *HostPtr, TargetAllocTy Kind); 766330d8983SJohannes Doerfert 767330d8983SJohannes Doerfert /// Deallocate data from the device or involving the device. 768330d8983SJohannes Doerfert Error dataDelete(void *TgtPtr, TargetAllocTy Kind); 769330d8983SJohannes Doerfert 770330d8983SJohannes Doerfert /// Pin host memory to optimize transfers and return the device accessible 771330d8983SJohannes Doerfert /// pointer that devices should use for memory transfers involving the host 772330d8983SJohannes Doerfert /// pinned allocation. 773330d8983SJohannes Doerfert Expected<void *> dataLock(void *HstPtr, int64_t Size) { 774330d8983SJohannes Doerfert return PinnedAllocs.lockHostBuffer(HstPtr, Size); 775330d8983SJohannes Doerfert } 776330d8983SJohannes Doerfert 777330d8983SJohannes Doerfert /// Unpin a host memory buffer that was previously pinned. 778330d8983SJohannes Doerfert Error dataUnlock(void *HstPtr) { 779330d8983SJohannes Doerfert return PinnedAllocs.unlockHostBuffer(HstPtr); 780330d8983SJohannes Doerfert } 781330d8983SJohannes Doerfert 782330d8983SJohannes Doerfert /// Lock the host buffer \p HstPtr with \p Size bytes with the vendor-specific 783330d8983SJohannes Doerfert /// API and return the device accessible pointer. 784330d8983SJohannes Doerfert virtual Expected<void *> dataLockImpl(void *HstPtr, int64_t Size) = 0; 785330d8983SJohannes Doerfert 786330d8983SJohannes Doerfert /// Unlock a previously locked host buffer starting at \p HstPtr. 787330d8983SJohannes Doerfert virtual Error dataUnlockImpl(void *HstPtr) = 0; 788330d8983SJohannes Doerfert 789330d8983SJohannes Doerfert /// Mark the host buffer with address \p HstPtr and \p Size bytes as a mapped 790330d8983SJohannes Doerfert /// buffer. This means that libomptarget created a new mapping of that host 791330d8983SJohannes Doerfert /// buffer (e.g., because a user OpenMP target map) and the buffer may be used 792330d8983SJohannes Doerfert /// as source/destination of memory transfers. We can use this information to 793330d8983SJohannes Doerfert /// lock the host buffer and optimize its memory transfers. 794330d8983SJohannes Doerfert Error notifyDataMapped(void *HstPtr, int64_t Size) { 795330d8983SJohannes Doerfert return PinnedAllocs.lockMappedHostBuffer(HstPtr, Size); 796330d8983SJohannes Doerfert } 797330d8983SJohannes Doerfert 798330d8983SJohannes Doerfert /// Mark the host buffer with address \p HstPtr as unmapped. This means that 799330d8983SJohannes Doerfert /// libomptarget removed an existing mapping. If the plugin locked the buffer 800330d8983SJohannes Doerfert /// in notifyDataMapped, this function should unlock it. 801330d8983SJohannes Doerfert Error notifyDataUnmapped(void *HstPtr) { 802330d8983SJohannes Doerfert return PinnedAllocs.unlockUnmappedHostBuffer(HstPtr); 803330d8983SJohannes Doerfert } 804330d8983SJohannes Doerfert 805330d8983SJohannes Doerfert /// Check whether the host buffer with address \p HstPtr is pinned by the 806330d8983SJohannes Doerfert /// underlying vendor-specific runtime (if any). Retrieve the host pointer, 807330d8983SJohannes Doerfert /// the device accessible pointer and the size of the original pinned buffer. 808330d8983SJohannes Doerfert virtual Expected<bool> isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr, 809330d8983SJohannes Doerfert void *&BaseDevAccessiblePtr, 810330d8983SJohannes Doerfert size_t &BaseSize) const = 0; 811330d8983SJohannes Doerfert 812330d8983SJohannes Doerfert /// Submit data to the device (host to device transfer). 813330d8983SJohannes Doerfert Error dataSubmit(void *TgtPtr, const void *HstPtr, int64_t Size, 814330d8983SJohannes Doerfert __tgt_async_info *AsyncInfo); 815330d8983SJohannes Doerfert virtual Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, 816330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; 817330d8983SJohannes Doerfert 818330d8983SJohannes Doerfert /// Retrieve data from the device (device to host transfer). 819330d8983SJohannes Doerfert Error dataRetrieve(void *HstPtr, const void *TgtPtr, int64_t Size, 820330d8983SJohannes Doerfert __tgt_async_info *AsyncInfo); 821330d8983SJohannes Doerfert virtual Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size, 822330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; 823330d8983SJohannes Doerfert 824330d8983SJohannes Doerfert /// Exchange data between devices (device to device transfer). Calling this 825330d8983SJohannes Doerfert /// function is only valid if GenericPlugin::isDataExchangable() passing the 826330d8983SJohannes Doerfert /// two devices returns true. 827330d8983SJohannes Doerfert Error dataExchange(const void *SrcPtr, GenericDeviceTy &DstDev, void *DstPtr, 828330d8983SJohannes Doerfert int64_t Size, __tgt_async_info *AsyncInfo); 829330d8983SJohannes Doerfert virtual Error dataExchangeImpl(const void *SrcPtr, GenericDeviceTy &DstDev, 830330d8983SJohannes Doerfert void *DstPtr, int64_t Size, 831330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; 832330d8983SJohannes Doerfert 833330d8983SJohannes Doerfert /// Run the kernel associated with \p EntryPtr 834330d8983SJohannes Doerfert Error launchKernel(void *EntryPtr, void **ArgPtrs, ptrdiff_t *ArgOffsets, 835330d8983SJohannes Doerfert KernelArgsTy &KernelArgs, __tgt_async_info *AsyncInfo); 836330d8983SJohannes Doerfert 837330d8983SJohannes Doerfert /// Initialize a __tgt_async_info structure. Related to interop features. 838330d8983SJohannes Doerfert Error initAsyncInfo(__tgt_async_info **AsyncInfoPtr); 839330d8983SJohannes Doerfert virtual Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; 840330d8983SJohannes Doerfert 841330d8983SJohannes Doerfert /// Initialize a __tgt_device_info structure. Related to interop features. 842330d8983SJohannes Doerfert Error initDeviceInfo(__tgt_device_info *DeviceInfo); 843330d8983SJohannes Doerfert virtual Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) = 0; 844330d8983SJohannes Doerfert 845330d8983SJohannes Doerfert /// Create an event. 846330d8983SJohannes Doerfert Error createEvent(void **EventPtrStorage); 847330d8983SJohannes Doerfert virtual Error createEventImpl(void **EventPtrStorage) = 0; 848330d8983SJohannes Doerfert 849330d8983SJohannes Doerfert /// Destroy an event. 850330d8983SJohannes Doerfert Error destroyEvent(void *Event); 851330d8983SJohannes Doerfert virtual Error destroyEventImpl(void *EventPtr) = 0; 852330d8983SJohannes Doerfert 853330d8983SJohannes Doerfert /// Start the recording of the event. 854330d8983SJohannes Doerfert Error recordEvent(void *Event, __tgt_async_info *AsyncInfo); 855330d8983SJohannes Doerfert virtual Error recordEventImpl(void *EventPtr, 856330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; 857330d8983SJohannes Doerfert 858330d8983SJohannes Doerfert /// Wait for an event to finish. Notice this wait is asynchronous if the 859330d8983SJohannes Doerfert /// __tgt_async_info is not nullptr. 860330d8983SJohannes Doerfert Error waitEvent(void *Event, __tgt_async_info *AsyncInfo); 861330d8983SJohannes Doerfert virtual Error waitEventImpl(void *EventPtr, 862330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; 863330d8983SJohannes Doerfert 864330d8983SJohannes Doerfert /// Synchronize the current thread with the event. 865330d8983SJohannes Doerfert Error syncEvent(void *EventPtr); 866330d8983SJohannes Doerfert virtual Error syncEventImpl(void *EventPtr) = 0; 867330d8983SJohannes Doerfert 868330d8983SJohannes Doerfert /// Print information about the device. 869330d8983SJohannes Doerfert Error printInfo(); 870330d8983SJohannes Doerfert virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0; 871330d8983SJohannes Doerfert 872330d8983SJohannes Doerfert /// Getters of the grid values. 873330d8983SJohannes Doerfert uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; } 874330d8983SJohannes Doerfert uint32_t getThreadLimit() const { return GridValues.GV_Max_WG_Size; } 875330d8983SJohannes Doerfert uint32_t getBlockLimit() const { return GridValues.GV_Max_Teams; } 876330d8983SJohannes Doerfert uint32_t getDefaultNumThreads() const { 877330d8983SJohannes Doerfert return GridValues.GV_Default_WG_Size; 878330d8983SJohannes Doerfert } 879330d8983SJohannes Doerfert uint32_t getDefaultNumBlocks() const { 880330d8983SJohannes Doerfert return GridValues.GV_Default_Num_Teams; 881330d8983SJohannes Doerfert } 882330d8983SJohannes Doerfert uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; } 883330d8983SJohannes Doerfert virtual uint64_t getClockFrequency() const { return CLOCKS_PER_SEC; } 884330d8983SJohannes Doerfert 885330d8983SJohannes Doerfert /// Get target compute unit kind (e.g., sm_80, or gfx908). 886330d8983SJohannes Doerfert virtual std::string getComputeUnitKind() const { return "unknown"; } 887330d8983SJohannes Doerfert 888330d8983SJohannes Doerfert /// Post processing after jit backend. The ownership of \p MB will be taken. 889330d8983SJohannes Doerfert virtual Expected<std::unique_ptr<MemoryBuffer>> 890330d8983SJohannes Doerfert doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const { 891330d8983SJohannes Doerfert return std::move(MB); 892330d8983SJohannes Doerfert } 893330d8983SJohannes Doerfert 894330d8983SJohannes Doerfert /// The minimum number of threads we use for a low-trip count combined loop. 895330d8983SJohannes Doerfert /// Instead of using more threads we increase the outer (block/team) 896330d8983SJohannes Doerfert /// parallelism. 897330d8983SJohannes Doerfert /// @see OMPX_MinThreadsForLowTripCount 898330d8983SJohannes Doerfert virtual uint32_t getMinThreadsForLowTripCountLoop() { 899330d8983SJohannes Doerfert return OMPX_MinThreadsForLowTripCount; 900330d8983SJohannes Doerfert } 901330d8983SJohannes Doerfert 902597d2f76STim Gymnich /// Whether or not to reuse blocks for high trip count loops. 903597d2f76STim Gymnich /// @see OMPX_ReuseBlocksForHighTripCount 904597d2f76STim Gymnich bool getReuseBlocksForHighTripCount() { 905597d2f76STim Gymnich return OMPX_ReuseBlocksForHighTripCount; 906597d2f76STim Gymnich } 907597d2f76STim Gymnich 908330d8983SJohannes Doerfert /// Get the total amount of hardware parallelism supported by the target 909330d8983SJohannes Doerfert /// device. This is the total amount of warps or wavefronts that can be 910330d8983SJohannes Doerfert /// resident on the device simultaneously. 911330d8983SJohannes Doerfert virtual uint64_t getHardwareParallelism() const { return 0; } 912330d8983SJohannes Doerfert 913330d8983SJohannes Doerfert /// Get the RPC server running on this device. 914330d8983SJohannes Doerfert RPCServerTy *getRPCServer() const { return RPCServer; } 915330d8983SJohannes Doerfert 916330d8983SJohannes Doerfert /// The number of parallel RPC ports to use on the device. In general, this 917330d8983SJohannes Doerfert /// should be roughly equivalent to the amount of hardware parallelism the 918330d8983SJohannes Doerfert /// device can support. This is because GPUs in general do not have forward 919330d8983SJohannes Doerfert /// progress guarantees, so we minimize thread level dependencies by 920330d8983SJohannes Doerfert /// allocating enough space such that each device thread can have a port. This 921330d8983SJohannes Doerfert /// is likely overly pessimistic in the average case, but guarantees no 922330d8983SJohannes Doerfert /// deadlocks at the cost of memory. This must be overloaded by targets 923330d8983SJohannes Doerfert /// expecting to use the RPC server. 924330d8983SJohannes Doerfert virtual uint64_t requestedRPCPortCount() const { 925330d8983SJohannes Doerfert assert(!shouldSetupRPCServer() && "Default implementation cannot be used"); 926330d8983SJohannes Doerfert return 0; 927330d8983SJohannes Doerfert } 928330d8983SJohannes Doerfert 929330d8983SJohannes Doerfert virtual Error getDeviceStackSize(uint64_t &V) = 0; 930330d8983SJohannes Doerfert 931330d8983SJohannes Doerfert /// Returns true if current plugin architecture is an APU 932330d8983SJohannes Doerfert /// and unified_shared_memory was not requested by the program. 933330d8983SJohannes Doerfert bool useAutoZeroCopy(); 934330d8983SJohannes Doerfert virtual bool useAutoZeroCopyImpl() { return false; } 935330d8983SJohannes Doerfert 936330d8983SJohannes Doerfert /// Allocate and construct a kernel object. 937330d8983SJohannes Doerfert virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0; 938330d8983SJohannes Doerfert 939330d8983SJohannes Doerfert /// Reference to the underlying plugin that created this device. 940330d8983SJohannes Doerfert GenericPluginTy &Plugin; 941330d8983SJohannes Doerfert 942c95abe94SJohannes Doerfert /// Map to record when allocations have been performed, and when they have 943c95abe94SJohannes Doerfert /// been deallocated, both for error reporting purposes. 944c95abe94SJohannes Doerfert ProtectedObj<DenseMap<void *, AllocationTraceInfoTy *>> AllocationTraces; 945c95abe94SJohannes Doerfert 9463b761159SJohannes Doerfert /// Return the allocation trace info for a device pointer, that is the 9473b761159SJohannes Doerfert /// allocation into which this device pointer points to (or pointed into). 9483b761159SJohannes Doerfert AllocationTraceInfoTy *getAllocationTraceInfoForAddr(void *DevicePtr) { 9493b761159SJohannes Doerfert auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor(); 9503b761159SJohannes Doerfert for (auto &It : *AllocationTraceMap) { 9513b761159SJohannes Doerfert if (It.first <= DevicePtr && 95208533a3eSJohannes Doerfert utils::advancePtr(It.first, It.second->Size) > DevicePtr) 9533b761159SJohannes Doerfert return It.second; 9543b761159SJohannes Doerfert } 9553b761159SJohannes Doerfert return nullptr; 9563b761159SJohannes Doerfert } 9573b761159SJohannes Doerfert 9583b761159SJohannes Doerfert /// Return the allocation trace info for a device pointer, that is the 9593b761159SJohannes Doerfert /// allocation into which this device pointer points to (or pointed into). 9603b761159SJohannes Doerfert AllocationTraceInfoTy * 9613b761159SJohannes Doerfert getClosestAllocationTraceInfoForAddr(void *DevicePtr, uintptr_t &Distance) { 9623b761159SJohannes Doerfert Distance = 0; 9633b761159SJohannes Doerfert if (auto *ATI = getAllocationTraceInfoForAddr(DevicePtr)) { 9643b761159SJohannes Doerfert return ATI; 9653b761159SJohannes Doerfert } 9663b761159SJohannes Doerfert 9673b761159SJohannes Doerfert AllocationTraceInfoTy *ATI = nullptr; 9683b761159SJohannes Doerfert uintptr_t DevicePtrI = uintptr_t(DevicePtr); 9693b761159SJohannes Doerfert auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor(); 9703b761159SJohannes Doerfert for (auto &It : *AllocationTraceMap) { 9713b761159SJohannes Doerfert uintptr_t Begin = uintptr_t(It.second->DevicePtr); 9723b761159SJohannes Doerfert uintptr_t End = Begin + It.second->Size - 1; 9733b761159SJohannes Doerfert uintptr_t ItDistance = std::min(Begin - DevicePtrI, DevicePtrI - End); 9743b761159SJohannes Doerfert if (ATI && ItDistance > Distance) 9753b761159SJohannes Doerfert continue; 9763b761159SJohannes Doerfert ATI = It.second; 9773b761159SJohannes Doerfert Distance = ItDistance; 9783b761159SJohannes Doerfert } 9793b761159SJohannes Doerfert return ATI; 9803b761159SJohannes Doerfert } 9813b761159SJohannes Doerfert 9829a101322SJohannes Doerfert /// Map to record kernel have been launchedl, for error reporting purposes. 9839a101322SJohannes Doerfert ProtectedObj<KernelTraceInfoRecordTy> KernelLaunchTraces; 9849a101322SJohannes Doerfert 9859a101322SJohannes Doerfert /// Environment variable to determine if stack traces for kernel launches are 9869a101322SJohannes Doerfert /// tracked. 9879a101322SJohannes Doerfert UInt32Envar OMPX_TrackNumKernelLaunches = 9889a101322SJohannes Doerfert UInt32Envar("OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES", 0); 9899a101322SJohannes Doerfert 9903b761159SJohannes Doerfert /// Environment variable to determine if stack traces for allocations and 9913b761159SJohannes Doerfert /// deallocations are tracked. 9923b761159SJohannes Doerfert BoolEnvar OMPX_TrackAllocationTraces = 9933b761159SJohannes Doerfert BoolEnvar("OFFLOAD_TRACK_ALLOCATION_TRACES", false); 9943b761159SJohannes Doerfert 995330d8983SJohannes Doerfert private: 996330d8983SJohannes Doerfert /// Get and set the stack size and heap size for the device. If not used, the 997330d8983SJohannes Doerfert /// plugin can implement the setters as no-op and setting the output 998330d8983SJohannes Doerfert /// value to zero for the getters. 999330d8983SJohannes Doerfert virtual Error setDeviceStackSize(uint64_t V) = 0; 1000330d8983SJohannes Doerfert virtual Error getDeviceHeapSize(uint64_t &V) = 0; 1001330d8983SJohannes Doerfert virtual Error setDeviceHeapSize(uint64_t V) = 0; 1002330d8983SJohannes Doerfert 1003330d8983SJohannes Doerfert /// Indicate whether the device should setup the device environment. Notice 1004330d8983SJohannes Doerfert /// that returning false in this function will change the behavior of the 1005330d8983SJohannes Doerfert /// setupDeviceEnvironment() function. 1006330d8983SJohannes Doerfert virtual bool shouldSetupDeviceEnvironment() const { return true; } 1007330d8983SJohannes Doerfert 1008330d8983SJohannes Doerfert /// Indicate whether the device should setup the global device memory pool. If 1009330d8983SJohannes Doerfert /// false is return the value on the device will be uninitialized. 1010330d8983SJohannes Doerfert virtual bool shouldSetupDeviceMemoryPool() const { return true; } 1011330d8983SJohannes Doerfert 1012330d8983SJohannes Doerfert /// Indicate whether or not the device should setup the RPC server. This is 1013330d8983SJohannes Doerfert /// only necessary for unhosted targets like the GPU. 1014330d8983SJohannes Doerfert virtual bool shouldSetupRPCServer() const { return false; } 1015330d8983SJohannes Doerfert 1016330d8983SJohannes Doerfert /// Pointer to the memory manager or nullptr if not available. 1017330d8983SJohannes Doerfert MemoryManagerTy *MemoryManager; 1018330d8983SJohannes Doerfert 1019330d8983SJohannes Doerfert /// Environment variables defined by the OpenMP standard. 1020330d8983SJohannes Doerfert Int32Envar OMP_TeamLimit; 1021330d8983SJohannes Doerfert Int32Envar OMP_NumTeams; 1022330d8983SJohannes Doerfert Int32Envar OMP_TeamsThreadLimit; 1023330d8983SJohannes Doerfert 1024330d8983SJohannes Doerfert /// Environment variables defined by the LLVM OpenMP implementation. 1025330d8983SJohannes Doerfert Int32Envar OMPX_DebugKind; 1026330d8983SJohannes Doerfert UInt32Envar OMPX_SharedMemorySize; 1027330d8983SJohannes Doerfert UInt64Envar OMPX_TargetStackSize; 1028330d8983SJohannes Doerfert UInt64Envar OMPX_TargetHeapSize; 1029330d8983SJohannes Doerfert 1030330d8983SJohannes Doerfert /// Environment flag to set the minimum number of threads we use for a 1031330d8983SJohannes Doerfert /// low-trip count combined loop. Instead of using more threads we increase 1032330d8983SJohannes Doerfert /// the outer (block/team) parallelism. 1033330d8983SJohannes Doerfert UInt32Envar OMPX_MinThreadsForLowTripCount = 1034330d8983SJohannes Doerfert UInt32Envar("LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT", 32); 1035330d8983SJohannes Doerfert 1036597d2f76STim Gymnich BoolEnvar OMPX_ReuseBlocksForHighTripCount = 1037597d2f76STim Gymnich BoolEnvar("LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT", true); 1038597d2f76STim Gymnich 1039330d8983SJohannes Doerfert protected: 1040330d8983SJohannes Doerfert /// Environment variables defined by the LLVM OpenMP implementation 1041330d8983SJohannes Doerfert /// regarding the initial number of streams and events. 1042330d8983SJohannes Doerfert UInt32Envar OMPX_InitialNumStreams; 1043330d8983SJohannes Doerfert UInt32Envar OMPX_InitialNumEvents; 1044330d8983SJohannes Doerfert 1045330d8983SJohannes Doerfert /// Array of images loaded into the device. Images are automatically 1046330d8983SJohannes Doerfert /// deallocated by the allocator. 1047330d8983SJohannes Doerfert llvm::SmallVector<DeviceImageTy *> LoadedImages; 1048330d8983SJohannes Doerfert 1049330d8983SJohannes Doerfert /// The identifier of the device within the plugin. Notice this is not a 1050330d8983SJohannes Doerfert /// global device id and is not the device id visible to the OpenMP user. 1051330d8983SJohannes Doerfert const int32_t DeviceId; 1052330d8983SJohannes Doerfert 1053330d8983SJohannes Doerfert /// The default grid values used for this device. 1054330d8983SJohannes Doerfert llvm::omp::GV GridValues; 1055330d8983SJohannes Doerfert 1056330d8983SJohannes Doerfert /// Enumeration used for representing the current state between two devices 1057330d8983SJohannes Doerfert /// two devices (both under the same plugin) for the peer access between them. 1058330d8983SJohannes Doerfert /// The states can be a) PENDING when the state has not been queried and needs 1059330d8983SJohannes Doerfert /// to be queried, b) AVAILABLE when the peer access is available to be used, 1060330d8983SJohannes Doerfert /// and c) UNAVAILABLE if the system does not allow it. 1061330d8983SJohannes Doerfert enum class PeerAccessState : uint8_t { AVAILABLE, UNAVAILABLE, PENDING }; 1062330d8983SJohannes Doerfert 1063330d8983SJohannes Doerfert /// Array of peer access states with the rest of devices. This means that if 1064330d8983SJohannes Doerfert /// the device I has a matrix PeerAccesses with PeerAccesses[J] == AVAILABLE, 1065330d8983SJohannes Doerfert /// the device I can access device J's memory directly. However, notice this 1066330d8983SJohannes Doerfert /// does not mean that device J can access device I's memory directly. 1067330d8983SJohannes Doerfert llvm::SmallVector<PeerAccessState> PeerAccesses; 1068330d8983SJohannes Doerfert std::mutex PeerAccessesLock; 1069330d8983SJohannes Doerfert 1070330d8983SJohannes Doerfert /// Map of host pinned allocations used for optimize device transfers. 1071330d8983SJohannes Doerfert PinnedAllocationMapTy PinnedAllocs; 1072330d8983SJohannes Doerfert 1073330d8983SJohannes Doerfert /// A pointer to an RPC server instance attached to this device if present. 1074330d8983SJohannes Doerfert /// This is used to run the RPC server during task synchronization. 1075330d8983SJohannes Doerfert RPCServerTy *RPCServer; 1076330d8983SJohannes Doerfert 1077330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT 1078330d8983SJohannes Doerfert /// OMPT callback functions 1079330d8983SJohannes Doerfert #define defineOmptCallback(Name, Type, Code) Name##_t Name##_fn = nullptr; 1080330d8983SJohannes Doerfert FOREACH_OMPT_DEVICE_EVENT(defineOmptCallback) 1081330d8983SJohannes Doerfert #undef defineOmptCallback 1082330d8983SJohannes Doerfert 1083330d8983SJohannes Doerfert /// Internal representation for OMPT device (initialize & finalize) 1084330d8983SJohannes Doerfert std::atomic<bool> OmptInitialized; 1085330d8983SJohannes Doerfert #endif 1086330d8983SJohannes Doerfert 1087330d8983SJohannes Doerfert private: 1088330d8983SJohannes Doerfert DeviceMemoryPoolTy DeviceMemoryPool = {nullptr, 0}; 1089330d8983SJohannes Doerfert DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0}; 1090330d8983SJohannes Doerfert }; 1091330d8983SJohannes Doerfert 1092330d8983SJohannes Doerfert /// Class implementing common functionalities of offload plugins. Each plugin 1093330d8983SJohannes Doerfert /// should define the specific plugin class, derive from this generic one, and 1094330d8983SJohannes Doerfert /// implement the necessary virtual function members. 1095330d8983SJohannes Doerfert struct GenericPluginTy { 1096330d8983SJohannes Doerfert 1097330d8983SJohannes Doerfert /// Construct a plugin instance. 1098330d8983SJohannes Doerfert GenericPluginTy(Triple::ArchType TA) 1099f42f57b5SJoseph Huber : GlobalHandler(nullptr), JIT(TA), RPCServer(nullptr), 1100f42f57b5SJoseph Huber RecordReplay(nullptr) {} 1101330d8983SJohannes Doerfert 1102330d8983SJohannes Doerfert virtual ~GenericPluginTy() {} 1103330d8983SJohannes Doerfert 1104330d8983SJohannes Doerfert /// Initialize the plugin. 1105330d8983SJohannes Doerfert Error init(); 1106330d8983SJohannes Doerfert 1107330d8983SJohannes Doerfert /// Initialize the plugin and return the number of available devices. 1108330d8983SJohannes Doerfert virtual Expected<int32_t> initImpl() = 0; 1109330d8983SJohannes Doerfert 1110330d8983SJohannes Doerfert /// Deinitialize the plugin and release the resources. 1111330d8983SJohannes Doerfert Error deinit(); 1112330d8983SJohannes Doerfert virtual Error deinitImpl() = 0; 1113330d8983SJohannes Doerfert 1114330d8983SJohannes Doerfert /// Create a new device for the underlying plugin. 1115330d8983SJohannes Doerfert virtual GenericDeviceTy *createDevice(GenericPluginTy &Plugin, 1116330d8983SJohannes Doerfert int32_t DeviceID, 1117330d8983SJohannes Doerfert int32_t NumDevices) = 0; 1118330d8983SJohannes Doerfert 1119330d8983SJohannes Doerfert /// Create a new global handler for the underlying plugin. 1120330d8983SJohannes Doerfert virtual GenericGlobalHandlerTy *createGlobalHandler() = 0; 1121330d8983SJohannes Doerfert 1122330d8983SJohannes Doerfert /// Get the reference to the device with a certain device id. 1123330d8983SJohannes Doerfert GenericDeviceTy &getDevice(int32_t DeviceId) { 1124330d8983SJohannes Doerfert assert(isValidDeviceId(DeviceId) && "Invalid device id"); 1125330d8983SJohannes Doerfert assert(Devices[DeviceId] && "Device is unitialized"); 1126330d8983SJohannes Doerfert 1127330d8983SJohannes Doerfert return *Devices[DeviceId]; 1128330d8983SJohannes Doerfert } 1129330d8983SJohannes Doerfert 1130330d8983SJohannes Doerfert /// Get the number of active devices. 1131330d8983SJohannes Doerfert int32_t getNumDevices() const { return NumDevices; } 1132330d8983SJohannes Doerfert 1133435aa766SJoseph Huber /// Get the plugin-specific device identifier. 1134435aa766SJoseph Huber int32_t getUserId(int32_t DeviceId) const { 1135435aa766SJoseph Huber assert(UserDeviceIds.contains(DeviceId) && "No user-id registered"); 1136435aa766SJoseph Huber return UserDeviceIds.at(DeviceId); 1137435aa766SJoseph Huber } 1138330d8983SJohannes Doerfert 1139330d8983SJohannes Doerfert /// Get the ELF code to recognize the binary image of this plugin. 1140330d8983SJohannes Doerfert virtual uint16_t getMagicElfBits() const = 0; 1141330d8983SJohannes Doerfert 1142330d8983SJohannes Doerfert /// Get the target triple of this plugin. 1143330d8983SJohannes Doerfert virtual Triple::ArchType getTripleArch() const = 0; 1144330d8983SJohannes Doerfert 1145fa9e90f5SJoseph Huber /// Get the constant name identifier for this plugin. 1146fa9e90f5SJoseph Huber virtual const char *getName() const = 0; 1147fa9e90f5SJoseph Huber 1148330d8983SJohannes Doerfert /// Allocate a structure using the internal allocator. 1149330d8983SJohannes Doerfert template <typename Ty> Ty *allocate() { 1150330d8983SJohannes Doerfert return reinterpret_cast<Ty *>(Allocator.Allocate(sizeof(Ty), alignof(Ty))); 1151330d8983SJohannes Doerfert } 1152330d8983SJohannes Doerfert 1153330d8983SJohannes Doerfert /// Get the reference to the global handler of this plugin. 1154330d8983SJohannes Doerfert GenericGlobalHandlerTy &getGlobalHandler() { 1155330d8983SJohannes Doerfert assert(GlobalHandler && "Global handler not initialized"); 1156330d8983SJohannes Doerfert return *GlobalHandler; 1157330d8983SJohannes Doerfert } 1158330d8983SJohannes Doerfert 1159330d8983SJohannes Doerfert /// Get the reference to the JIT used for all devices connected to this 1160330d8983SJohannes Doerfert /// plugin. 1161330d8983SJohannes Doerfert JITEngine &getJIT() { return JIT; } 1162330d8983SJohannes Doerfert 1163330d8983SJohannes Doerfert /// Get a reference to the RPC server used to provide host services. 1164330d8983SJohannes Doerfert RPCServerTy &getRPCServer() { 1165330d8983SJohannes Doerfert assert(RPCServer && "RPC server not initialized"); 1166330d8983SJohannes Doerfert return *RPCServer; 1167330d8983SJohannes Doerfert } 1168330d8983SJohannes Doerfert 1169f42f57b5SJoseph Huber /// Get a reference to the record and replay interface for the plugin. 1170f42f57b5SJoseph Huber RecordReplayTy &getRecordReplay() { 1171f42f57b5SJoseph Huber assert(RecordReplay && "RR interface not initialized"); 1172f42f57b5SJoseph Huber return *RecordReplay; 1173f42f57b5SJoseph Huber } 1174f42f57b5SJoseph Huber 1175330d8983SJohannes Doerfert /// Initialize a device within the plugin. 1176330d8983SJohannes Doerfert Error initDevice(int32_t DeviceId); 1177330d8983SJohannes Doerfert 1178330d8983SJohannes Doerfert /// Deinitialize a device within the plugin and release its resources. 1179330d8983SJohannes Doerfert Error deinitDevice(int32_t DeviceId); 1180330d8983SJohannes Doerfert 1181330d8983SJohannes Doerfert /// Indicate whether data can be exchanged directly between two devices under 1182330d8983SJohannes Doerfert /// this same plugin. If this function returns true, it's safe to call the 1183330d8983SJohannes Doerfert /// GenericDeviceTy::exchangeData() function on the source device. 1184330d8983SJohannes Doerfert virtual bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) { 1185330d8983SJohannes Doerfert return isValidDeviceId(SrcDeviceId) && isValidDeviceId(DstDeviceId); 1186330d8983SJohannes Doerfert } 1187330d8983SJohannes Doerfert 1188330d8983SJohannes Doerfert /// Top level interface to verify if a given ELF image can be executed on a 1189330d8983SJohannes Doerfert /// given target. Returns true if the \p Image is compatible with the plugin. 1190330d8983SJohannes Doerfert Expected<bool> checkELFImage(StringRef Image) const; 1191330d8983SJohannes Doerfert 119221f3a609SJoseph Huber /// Return true if the \p Image can be compiled to run on the platform's 119321f3a609SJoseph Huber /// target architecture. 119421f3a609SJoseph Huber Expected<bool> checkBitcodeImage(StringRef Image) const; 119521f3a609SJoseph Huber 1196330d8983SJohannes Doerfert /// Indicate if an image is compatible with the plugin devices. Notice that 1197330d8983SJohannes Doerfert /// this function may be called before actually initializing the devices. So 1198330d8983SJohannes Doerfert /// we could not move this function into GenericDeviceTy. 1199435aa766SJoseph Huber virtual Expected<bool> isELFCompatible(uint32_t DeviceID, 1200435aa766SJoseph Huber StringRef Image) const = 0; 1201330d8983SJohannes Doerfert 1202330d8983SJohannes Doerfert protected: 1203330d8983SJohannes Doerfert /// Indicate whether a device id is valid. 1204330d8983SJohannes Doerfert bool isValidDeviceId(int32_t DeviceId) const { 1205330d8983SJohannes Doerfert return (DeviceId >= 0 && DeviceId < getNumDevices()); 1206330d8983SJohannes Doerfert } 1207330d8983SJohannes Doerfert 1208330d8983SJohannes Doerfert public: 1209330d8983SJohannes Doerfert // TODO: This plugin interface needs to be cleaned up. 1210330d8983SJohannes Doerfert 1211435aa766SJoseph Huber /// Returns non-zero if the plugin runtime has been initialized. 121221f3a609SJoseph Huber int32_t is_initialized() const; 121321f3a609SJoseph Huber 1214435aa766SJoseph Huber /// Returns non-zero if the \p Image is compatible with the plugin. This 1215435aa766SJoseph Huber /// function does not require the plugin to be initialized before use. 1216435aa766SJoseph Huber int32_t is_plugin_compatible(__tgt_device_image *Image); 1217435aa766SJoseph Huber 1218435aa766SJoseph Huber /// Returns non-zero if the \p Image is compatible with the device. 1219435aa766SJoseph Huber int32_t is_device_compatible(int32_t DeviceId, __tgt_device_image *Image); 1220435aa766SJoseph Huber 1221435aa766SJoseph Huber /// Returns non-zero if the plugin device has been initialized. 1222435aa766SJoseph Huber int32_t is_device_initialized(int32_t DeviceId) const; 1223330d8983SJohannes Doerfert 1224330d8983SJohannes Doerfert /// Initialize the device inside of the plugin. 1225330d8983SJohannes Doerfert int32_t init_device(int32_t DeviceId); 1226330d8983SJohannes Doerfert 1227330d8983SJohannes Doerfert /// Return the number of devices this plugin can support. 1228330d8983SJohannes Doerfert int32_t number_of_devices(); 1229330d8983SJohannes Doerfert 1230330d8983SJohannes Doerfert /// Returns non-zero if the data can be exchanged between the two devices. 1231330d8983SJohannes Doerfert int32_t is_data_exchangable(int32_t SrcDeviceId, int32_t DstDeviceId); 1232330d8983SJohannes Doerfert 1233330d8983SJohannes Doerfert /// Initializes the record and replay mechanism inside the plugin. 1234330d8983SJohannes Doerfert int32_t initialize_record_replay(int32_t DeviceId, int64_t MemorySize, 1235330d8983SJohannes Doerfert void *VAddr, bool isRecord, bool SaveOutput, 1236330d8983SJohannes Doerfert uint64_t &ReqPtrArgOffset); 1237330d8983SJohannes Doerfert 1238330d8983SJohannes Doerfert /// Loads the associated binary into the plugin and returns a handle to it. 1239330d8983SJohannes Doerfert int32_t load_binary(int32_t DeviceId, __tgt_device_image *TgtImage, 1240330d8983SJohannes Doerfert __tgt_device_binary *Binary); 1241330d8983SJohannes Doerfert 1242330d8983SJohannes Doerfert /// Allocates memory that is accessively to the given device. 1243330d8983SJohannes Doerfert void *data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, int32_t Kind); 1244330d8983SJohannes Doerfert 1245330d8983SJohannes Doerfert /// Deallocates memory on the given device. 1246330d8983SJohannes Doerfert int32_t data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind); 1247330d8983SJohannes Doerfert 1248330d8983SJohannes Doerfert /// Locks / pins host memory using the plugin runtime. 1249330d8983SJohannes Doerfert int32_t data_lock(int32_t DeviceId, void *Ptr, int64_t Size, 1250330d8983SJohannes Doerfert void **LockedPtr); 1251330d8983SJohannes Doerfert 1252330d8983SJohannes Doerfert /// Unlocks / unpins host memory using the plugin runtime. 1253330d8983SJohannes Doerfert int32_t data_unlock(int32_t DeviceId, void *Ptr); 1254330d8983SJohannes Doerfert 1255330d8983SJohannes Doerfert /// Notify the runtime about a new mapping that has been created outside. 1256330d8983SJohannes Doerfert int32_t data_notify_mapped(int32_t DeviceId, void *HstPtr, int64_t Size); 1257330d8983SJohannes Doerfert 1258330d8983SJohannes Doerfert /// Notify t he runtime about a mapping that has been deleted. 1259330d8983SJohannes Doerfert int32_t data_notify_unmapped(int32_t DeviceId, void *HstPtr); 1260330d8983SJohannes Doerfert 1261330d8983SJohannes Doerfert /// Copy data to the given device. 1262330d8983SJohannes Doerfert int32_t data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, 1263330d8983SJohannes Doerfert int64_t Size); 1264330d8983SJohannes Doerfert 1265330d8983SJohannes Doerfert /// Copy data to the given device asynchronously. 1266330d8983SJohannes Doerfert int32_t data_submit_async(int32_t DeviceId, void *TgtPtr, void *HstPtr, 1267330d8983SJohannes Doerfert int64_t Size, __tgt_async_info *AsyncInfoPtr); 1268330d8983SJohannes Doerfert 1269330d8983SJohannes Doerfert /// Copy data from the given device. 1270330d8983SJohannes Doerfert int32_t data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, 1271330d8983SJohannes Doerfert int64_t Size); 1272330d8983SJohannes Doerfert 1273330d8983SJohannes Doerfert /// Copy data from the given device asynchornously. 1274330d8983SJohannes Doerfert int32_t data_retrieve_async(int32_t DeviceId, void *HstPtr, void *TgtPtr, 1275330d8983SJohannes Doerfert int64_t Size, __tgt_async_info *AsyncInfoPtr); 1276330d8983SJohannes Doerfert 1277330d8983SJohannes Doerfert /// Exchange memory addresses between two devices. 1278330d8983SJohannes Doerfert int32_t data_exchange(int32_t SrcDeviceId, void *SrcPtr, int32_t DstDeviceId, 1279330d8983SJohannes Doerfert void *DstPtr, int64_t Size); 1280330d8983SJohannes Doerfert 1281330d8983SJohannes Doerfert /// Exchange memory addresses between two devices asynchronously. 1282330d8983SJohannes Doerfert int32_t data_exchange_async(int32_t SrcDeviceId, void *SrcPtr, 1283330d8983SJohannes Doerfert int DstDeviceId, void *DstPtr, int64_t Size, 1284330d8983SJohannes Doerfert __tgt_async_info *AsyncInfo); 1285330d8983SJohannes Doerfert 1286330d8983SJohannes Doerfert /// Begin executing a kernel on the given device. 1287330d8983SJohannes Doerfert int32_t launch_kernel(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, 1288330d8983SJohannes Doerfert ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs, 1289330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr); 1290330d8983SJohannes Doerfert 1291330d8983SJohannes Doerfert /// Synchronize an asyncrhonous queue with the plugin runtime. 1292330d8983SJohannes Doerfert int32_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr); 1293330d8983SJohannes Doerfert 1294330d8983SJohannes Doerfert /// Query the current state of an asynchronous queue. 1295330d8983SJohannes Doerfert int32_t query_async(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr); 1296330d8983SJohannes Doerfert 1297330d8983SJohannes Doerfert /// Prints information about the given devices supported by the plugin. 1298330d8983SJohannes Doerfert void print_device_info(int32_t DeviceId); 1299330d8983SJohannes Doerfert 1300330d8983SJohannes Doerfert /// Creates an event in the given plugin if supported. 1301330d8983SJohannes Doerfert int32_t create_event(int32_t DeviceId, void **EventPtr); 1302330d8983SJohannes Doerfert 1303330d8983SJohannes Doerfert /// Records an event that has occurred. 1304330d8983SJohannes Doerfert int32_t record_event(int32_t DeviceId, void *EventPtr, 1305330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr); 1306330d8983SJohannes Doerfert 1307330d8983SJohannes Doerfert /// Wait until an event has occurred. 1308330d8983SJohannes Doerfert int32_t wait_event(int32_t DeviceId, void *EventPtr, 1309330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr); 1310330d8983SJohannes Doerfert 1311330d8983SJohannes Doerfert /// Syncrhonize execution until an event is done. 1312330d8983SJohannes Doerfert int32_t sync_event(int32_t DeviceId, void *EventPtr); 1313330d8983SJohannes Doerfert 1314330d8983SJohannes Doerfert /// Remove the event from the plugin. 1315330d8983SJohannes Doerfert int32_t destroy_event(int32_t DeviceId, void *EventPtr); 1316330d8983SJohannes Doerfert 1317330d8983SJohannes Doerfert /// Remove the event from the plugin. 1318330d8983SJohannes Doerfert void set_info_flag(uint32_t NewInfoLevel); 1319330d8983SJohannes Doerfert 1320330d8983SJohannes Doerfert /// Creates an asynchronous queue for the given plugin. 1321330d8983SJohannes Doerfert int32_t init_async_info(int32_t DeviceId, __tgt_async_info **AsyncInfoPtr); 1322330d8983SJohannes Doerfert 1323330d8983SJohannes Doerfert /// Creates device information to be used for diagnostics. 1324330d8983SJohannes Doerfert int32_t init_device_info(int32_t DeviceId, __tgt_device_info *DeviceInfo, 1325330d8983SJohannes Doerfert const char **ErrStr); 1326330d8983SJohannes Doerfert 1327330d8983SJohannes Doerfert /// Sets the offset into the devices for use by OMPT. 1328435aa766SJoseph Huber int32_t set_device_identifier(int32_t UserId, int32_t DeviceId); 1329330d8983SJohannes Doerfert 1330330d8983SJohannes Doerfert /// Returns if the plugin can support auotmatic copy. 1331330d8983SJohannes Doerfert int32_t use_auto_zero_copy(int32_t DeviceId); 1332330d8983SJohannes Doerfert 1333330d8983SJohannes Doerfert /// Look up a global symbol in the given binary. 1334330d8983SJohannes Doerfert int32_t get_global(__tgt_device_binary Binary, uint64_t Size, 1335330d8983SJohannes Doerfert const char *Name, void **DevicePtr); 1336330d8983SJohannes Doerfert 1337330d8983SJohannes Doerfert /// Look up a kernel function in the given binary. 1338330d8983SJohannes Doerfert int32_t get_function(__tgt_device_binary Binary, const char *Name, 1339330d8983SJohannes Doerfert void **KernelPtr); 1340330d8983SJohannes Doerfert 1341330d8983SJohannes Doerfert private: 134221f3a609SJoseph Huber /// Indicates if the platform runtime has been fully initialized. 134321f3a609SJoseph Huber bool Initialized = false; 134421f3a609SJoseph Huber 1345330d8983SJohannes Doerfert /// Number of devices available for the plugin. 1346330d8983SJohannes Doerfert int32_t NumDevices = 0; 1347330d8983SJohannes Doerfert 1348435aa766SJoseph Huber /// Map of plugin device identifiers to the user device identifier. 1349435aa766SJoseph Huber llvm::DenseMap<int32_t, int32_t> UserDeviceIds; 1350330d8983SJohannes Doerfert 1351330d8983SJohannes Doerfert /// Array of pointers to the devices. Initially, they are all set to nullptr. 1352330d8983SJohannes Doerfert /// Once a device is initialized, the pointer is stored in the position given 1353330d8983SJohannes Doerfert /// by its device id. A position with nullptr means that the corresponding 1354330d8983SJohannes Doerfert /// device was not initialized yet. 1355330d8983SJohannes Doerfert llvm::SmallVector<GenericDeviceTy *> Devices; 1356330d8983SJohannes Doerfert 1357330d8983SJohannes Doerfert /// Pointer to the global handler for this plugin. 1358330d8983SJohannes Doerfert GenericGlobalHandlerTy *GlobalHandler; 1359330d8983SJohannes Doerfert 1360330d8983SJohannes Doerfert /// Internal allocator for different structures. 1361330d8983SJohannes Doerfert BumpPtrAllocator Allocator; 1362330d8983SJohannes Doerfert 1363330d8983SJohannes Doerfert /// The JIT engine shared by all devices connected to this plugin. 1364330d8983SJohannes Doerfert JITEngine JIT; 1365330d8983SJohannes Doerfert 1366330d8983SJohannes Doerfert /// The interface between the plugin and the GPU for host services. 1367330d8983SJohannes Doerfert RPCServerTy *RPCServer; 1368f42f57b5SJoseph Huber 1369f42f57b5SJoseph Huber /// The interface between the plugin and the GPU for host services. 1370f42f57b5SJoseph Huber RecordReplayTy *RecordReplay; 1371330d8983SJohannes Doerfert }; 1372330d8983SJohannes Doerfert 1373330d8983SJohannes Doerfert namespace Plugin { 1374330d8983SJohannes Doerfert /// Create a success error. This is the same as calling Error::success(), but 1375330d8983SJohannes Doerfert /// it is recommended to use this one for consistency with Plugin::error() and 1376330d8983SJohannes Doerfert /// Plugin::check(). 1377fa9e90f5SJoseph Huber static inline Error success() { return Error::success(); } 1378330d8983SJohannes Doerfert 1379330d8983SJohannes Doerfert /// Create a string error. 1380330d8983SJohannes Doerfert template <typename... ArgsTy> 1381330d8983SJohannes Doerfert static Error error(const char *ErrFmt, ArgsTy... Args) { 1382330d8983SJohannes Doerfert return createStringError(inconvertibleErrorCode(), ErrFmt, Args...); 1383330d8983SJohannes Doerfert } 1384330d8983SJohannes Doerfert 1385330d8983SJohannes Doerfert /// Check the plugin-specific error code and return an error or success 1386330d8983SJohannes Doerfert /// accordingly. In case of an error, create a string error with the error 1387330d8983SJohannes Doerfert /// description. The ErrFmt should follow the format: 1388330d8983SJohannes Doerfert /// "Error in <function name>[<optional info>]: %s" 1389330d8983SJohannes Doerfert /// The last format specifier "%s" is mandatory and will be used to place the 1390330d8983SJohannes Doerfert /// error code's description. Notice this function should be only called from 1391330d8983SJohannes Doerfert /// the plugin-specific code. 1392330d8983SJohannes Doerfert /// TODO: Refactor this, must be defined individually by each plugin. 1393330d8983SJohannes Doerfert template <typename... ArgsTy> 1394330d8983SJohannes Doerfert static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); 1395330d8983SJohannes Doerfert } // namespace Plugin 1396330d8983SJohannes Doerfert 1397330d8983SJohannes Doerfert /// Auxiliary interface class for GenericDeviceResourceManagerTy. This class 1398330d8983SJohannes Doerfert /// acts as a reference to a device resource, such as a stream, and requires 1399330d8983SJohannes Doerfert /// some basic functions to be implemented. The derived class should define an 1400330d8983SJohannes Doerfert /// empty constructor that creates an empty and invalid resource reference. Do 1401330d8983SJohannes Doerfert /// not create a new resource on the ctor, but on the create() function instead. 1402330d8983SJohannes Doerfert /// 1403330d8983SJohannes Doerfert /// The derived class should also define the type HandleTy as the underlying 1404330d8983SJohannes Doerfert /// resource handle type. For instance, in a CUDA stream it would be: 1405330d8983SJohannes Doerfert /// using HandleTy = CUstream; 1406330d8983SJohannes Doerfert struct GenericDeviceResourceRef { 1407330d8983SJohannes Doerfert /// Create a new resource and stores a reference. 1408330d8983SJohannes Doerfert virtual Error create(GenericDeviceTy &Device) = 0; 1409330d8983SJohannes Doerfert 1410330d8983SJohannes Doerfert /// Destroy and release the resources pointed by the reference. 1411330d8983SJohannes Doerfert virtual Error destroy(GenericDeviceTy &Device) = 0; 1412330d8983SJohannes Doerfert 1413330d8983SJohannes Doerfert protected: 1414330d8983SJohannes Doerfert ~GenericDeviceResourceRef() = default; 1415330d8983SJohannes Doerfert }; 1416330d8983SJohannes Doerfert 1417330d8983SJohannes Doerfert /// Class that implements a resource pool belonging to a device. This class 1418330d8983SJohannes Doerfert /// operates with references to the actual resources. These reference must 1419330d8983SJohannes Doerfert /// derive from the GenericDeviceResourceRef class and implement the create 1420330d8983SJohannes Doerfert /// and destroy virtual functions. 1421330d8983SJohannes Doerfert template <typename ResourceRef> class GenericDeviceResourceManagerTy { 1422330d8983SJohannes Doerfert using ResourcePoolTy = GenericDeviceResourceManagerTy<ResourceRef>; 1423330d8983SJohannes Doerfert using ResourceHandleTy = typename ResourceRef::HandleTy; 1424330d8983SJohannes Doerfert 1425330d8983SJohannes Doerfert public: 1426330d8983SJohannes Doerfert /// Create an empty resource pool for a specific device. 1427330d8983SJohannes Doerfert GenericDeviceResourceManagerTy(GenericDeviceTy &Device) 1428330d8983SJohannes Doerfert : Device(Device), NextAvailable(0) {} 1429330d8983SJohannes Doerfert 1430330d8983SJohannes Doerfert /// Destroy the resource pool. At this point, the deinit() function should 1431330d8983SJohannes Doerfert /// already have been executed so the resource pool should be empty. 1432330d8983SJohannes Doerfert virtual ~GenericDeviceResourceManagerTy() { 1433330d8983SJohannes Doerfert assert(ResourcePool.empty() && "Resource pool not empty"); 1434330d8983SJohannes Doerfert } 1435330d8983SJohannes Doerfert 1436330d8983SJohannes Doerfert /// Initialize the resource pool. 1437330d8983SJohannes Doerfert Error init(uint32_t InitialSize) { 1438330d8983SJohannes Doerfert assert(ResourcePool.empty() && "Resource pool already initialized"); 1439330d8983SJohannes Doerfert return ResourcePoolTy::resizeResourcePool(InitialSize); 1440330d8983SJohannes Doerfert } 1441330d8983SJohannes Doerfert 1442330d8983SJohannes Doerfert /// Deinitialize the resource pool and delete all resources. This function 1443330d8983SJohannes Doerfert /// must be called before the destructor. 1444330d8983SJohannes Doerfert virtual Error deinit() { 1445330d8983SJohannes Doerfert if (NextAvailable) 1446330d8983SJohannes Doerfert DP("Missing %d resources to be returned\n", NextAvailable); 1447330d8983SJohannes Doerfert 1448330d8983SJohannes Doerfert // TODO: This prevents a bug on libomptarget to make the plugins fail. There 1449330d8983SJohannes Doerfert // may be some resources not returned. Do not destroy these ones. 1450330d8983SJohannes Doerfert if (auto Err = ResourcePoolTy::resizeResourcePool(NextAvailable)) 1451330d8983SJohannes Doerfert return Err; 1452330d8983SJohannes Doerfert 1453330d8983SJohannes Doerfert ResourcePool.clear(); 1454330d8983SJohannes Doerfert 1455330d8983SJohannes Doerfert return Plugin::success(); 1456330d8983SJohannes Doerfert } 1457330d8983SJohannes Doerfert 1458330d8983SJohannes Doerfert /// Get a resource from the pool or create new ones. If the function 1459330d8983SJohannes Doerfert /// succeeds, the handle to the resource is saved in \p Handle. 1460330d8983SJohannes Doerfert virtual Error getResource(ResourceHandleTy &Handle) { 1461330d8983SJohannes Doerfert // Get a resource with an empty resource processor. 1462330d8983SJohannes Doerfert return getResourcesImpl(1, &Handle, 1463330d8983SJohannes Doerfert [](ResourceHandleTy) { return Plugin::success(); }); 1464330d8983SJohannes Doerfert } 1465330d8983SJohannes Doerfert 1466330d8983SJohannes Doerfert /// Get multiple resources from the pool or create new ones. If the function 1467330d8983SJohannes Doerfert /// succeeds, the handles to the resources are saved in \p Handles. 1468330d8983SJohannes Doerfert virtual Error getResources(uint32_t Num, ResourceHandleTy *Handles) { 1469330d8983SJohannes Doerfert // Get resources with an empty resource processor. 1470330d8983SJohannes Doerfert return getResourcesImpl(Num, Handles, 1471330d8983SJohannes Doerfert [](ResourceHandleTy) { return Plugin::success(); }); 1472330d8983SJohannes Doerfert } 1473330d8983SJohannes Doerfert 1474330d8983SJohannes Doerfert /// Return resource to the pool. 1475330d8983SJohannes Doerfert virtual Error returnResource(ResourceHandleTy Handle) { 1476330d8983SJohannes Doerfert // Return a resource with an empty resource processor. 1477330d8983SJohannes Doerfert return returnResourceImpl( 1478330d8983SJohannes Doerfert Handle, [](ResourceHandleTy) { return Plugin::success(); }); 1479330d8983SJohannes Doerfert } 1480330d8983SJohannes Doerfert 1481330d8983SJohannes Doerfert protected: 1482330d8983SJohannes Doerfert /// Get multiple resources from the pool or create new ones. If the function 1483330d8983SJohannes Doerfert /// succeeds, the handles to the resources are saved in \p Handles. Also 1484330d8983SJohannes Doerfert /// process each of the obtained resources with \p Processor. 1485330d8983SJohannes Doerfert template <typename FuncTy> 1486330d8983SJohannes Doerfert Error getResourcesImpl(uint32_t Num, ResourceHandleTy *Handles, 1487330d8983SJohannes Doerfert FuncTy Processor) { 1488330d8983SJohannes Doerfert const std::lock_guard<std::mutex> Lock(Mutex); 1489330d8983SJohannes Doerfert 1490330d8983SJohannes Doerfert assert(NextAvailable <= ResourcePool.size() && 1491330d8983SJohannes Doerfert "Resource pool is corrupted"); 1492330d8983SJohannes Doerfert 1493330d8983SJohannes Doerfert if (NextAvailable + Num > ResourcePool.size()) 1494330d8983SJohannes Doerfert // Double the resource pool or resize it to provide the requested ones. 1495330d8983SJohannes Doerfert if (auto Err = ResourcePoolTy::resizeResourcePool( 1496330d8983SJohannes Doerfert std::max(NextAvailable * 2, NextAvailable + Num))) 1497330d8983SJohannes Doerfert return Err; 1498330d8983SJohannes Doerfert 1499330d8983SJohannes Doerfert // Save the handles in the output array parameter. 1500330d8983SJohannes Doerfert for (uint32_t r = 0; r < Num; ++r) 1501330d8983SJohannes Doerfert Handles[r] = ResourcePool[NextAvailable + r]; 1502330d8983SJohannes Doerfert 1503330d8983SJohannes Doerfert // Process all obtained resources. 1504330d8983SJohannes Doerfert for (uint32_t r = 0; r < Num; ++r) 1505330d8983SJohannes Doerfert if (auto Err = Processor(Handles[r])) 1506330d8983SJohannes Doerfert return Err; 1507330d8983SJohannes Doerfert 1508330d8983SJohannes Doerfert NextAvailable += Num; 1509330d8983SJohannes Doerfert 1510330d8983SJohannes Doerfert return Plugin::success(); 1511330d8983SJohannes Doerfert } 1512330d8983SJohannes Doerfert 1513330d8983SJohannes Doerfert /// Return resource to the pool and process the resource with \p Processor. 1514330d8983SJohannes Doerfert template <typename FuncTy> 1515330d8983SJohannes Doerfert Error returnResourceImpl(ResourceHandleTy Handle, FuncTy Processor) { 1516330d8983SJohannes Doerfert const std::lock_guard<std::mutex> Lock(Mutex); 1517330d8983SJohannes Doerfert 1518330d8983SJohannes Doerfert // Process the returned resource. 1519330d8983SJohannes Doerfert if (auto Err = Processor(Handle)) 1520330d8983SJohannes Doerfert return Err; 1521330d8983SJohannes Doerfert 1522330d8983SJohannes Doerfert assert(NextAvailable > 0 && "Resource pool is corrupted"); 1523330d8983SJohannes Doerfert ResourcePool[--NextAvailable] = Handle; 1524330d8983SJohannes Doerfert 1525330d8983SJohannes Doerfert return Plugin::success(); 1526330d8983SJohannes Doerfert } 1527330d8983SJohannes Doerfert 1528330d8983SJohannes Doerfert protected: 1529330d8983SJohannes Doerfert /// The resources between \p OldSize and \p NewSize need to be created or 1530330d8983SJohannes Doerfert /// destroyed. The mutex is locked when this function is called. 1531330d8983SJohannes Doerfert Error resizeResourcePoolImpl(uint32_t OldSize, uint32_t NewSize) { 1532330d8983SJohannes Doerfert assert(OldSize != NewSize && "Resizing to the same size"); 1533330d8983SJohannes Doerfert 1534330d8983SJohannes Doerfert if (auto Err = Device.setContext()) 1535330d8983SJohannes Doerfert return Err; 1536330d8983SJohannes Doerfert 1537330d8983SJohannes Doerfert if (OldSize < NewSize) { 1538330d8983SJohannes Doerfert // Create new resources. 1539330d8983SJohannes Doerfert for (uint32_t I = OldSize; I < NewSize; ++I) { 1540330d8983SJohannes Doerfert if (auto Err = ResourcePool[I].create(Device)) 1541330d8983SJohannes Doerfert return Err; 1542330d8983SJohannes Doerfert } 1543330d8983SJohannes Doerfert } else { 1544330d8983SJohannes Doerfert // Destroy the obsolete resources. 1545330d8983SJohannes Doerfert for (uint32_t I = NewSize; I < OldSize; ++I) { 1546330d8983SJohannes Doerfert if (auto Err = ResourcePool[I].destroy(Device)) 1547330d8983SJohannes Doerfert return Err; 1548330d8983SJohannes Doerfert } 1549330d8983SJohannes Doerfert } 1550330d8983SJohannes Doerfert return Plugin::success(); 1551330d8983SJohannes Doerfert } 1552330d8983SJohannes Doerfert 1553330d8983SJohannes Doerfert /// Increase or decrease the number of resources. This function should 1554330d8983SJohannes Doerfert /// be called with the mutex acquired. 1555330d8983SJohannes Doerfert Error resizeResourcePool(uint32_t NewSize) { 1556330d8983SJohannes Doerfert uint32_t OldSize = ResourcePool.size(); 1557330d8983SJohannes Doerfert 1558330d8983SJohannes Doerfert // Nothing to do. 1559330d8983SJohannes Doerfert if (OldSize == NewSize) 1560330d8983SJohannes Doerfert return Plugin::success(); 1561330d8983SJohannes Doerfert 1562330d8983SJohannes Doerfert if (OldSize < NewSize) { 1563330d8983SJohannes Doerfert // Increase the number of resources. 1564330d8983SJohannes Doerfert ResourcePool.resize(NewSize); 1565330d8983SJohannes Doerfert return ResourcePoolTy::resizeResourcePoolImpl(OldSize, NewSize); 1566330d8983SJohannes Doerfert } 1567330d8983SJohannes Doerfert 1568330d8983SJohannes Doerfert // Decrease the number of resources otherwise. 1569330d8983SJohannes Doerfert auto Err = ResourcePoolTy::resizeResourcePoolImpl(OldSize, NewSize); 1570330d8983SJohannes Doerfert ResourcePool.resize(NewSize); 1571330d8983SJohannes Doerfert 1572330d8983SJohannes Doerfert return Err; 1573330d8983SJohannes Doerfert } 1574330d8983SJohannes Doerfert 1575330d8983SJohannes Doerfert /// The device to which the resources belong 1576330d8983SJohannes Doerfert GenericDeviceTy &Device; 1577330d8983SJohannes Doerfert 1578330d8983SJohannes Doerfert /// Mutex for the resource pool. 1579330d8983SJohannes Doerfert std::mutex Mutex; 1580330d8983SJohannes Doerfert 1581330d8983SJohannes Doerfert /// The next available resource in the pool. 1582330d8983SJohannes Doerfert uint32_t NextAvailable; 1583330d8983SJohannes Doerfert 1584330d8983SJohannes Doerfert /// The actual resource pool. 1585330d8983SJohannes Doerfert std::deque<ResourceRef> ResourcePool; 1586330d8983SJohannes Doerfert }; 1587330d8983SJohannes Doerfert 1588330d8983SJohannes Doerfert } // namespace plugin 1589330d8983SJohannes Doerfert } // namespace target 1590330d8983SJohannes Doerfert } // namespace omp 1591330d8983SJohannes Doerfert } // namespace llvm 1592330d8983SJohannes Doerfert 1593330d8983SJohannes Doerfert #endif // OPENMP_LIBOMPTARGET_PLUGINS_COMMON_PLUGININTERFACE_H 1594