1330d8983SJohannes Doerfert //===- PluginInterface.cpp - Target independent plugin device interface ---===// 2330d8983SJohannes Doerfert // 3330d8983SJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4330d8983SJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information. 5330d8983SJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6330d8983SJohannes Doerfert // 7330d8983SJohannes Doerfert //===----------------------------------------------------------------------===// 8330d8983SJohannes Doerfert // 9330d8983SJohannes Doerfert //===----------------------------------------------------------------------===// 10330d8983SJohannes Doerfert 11330d8983SJohannes Doerfert #include "PluginInterface.h" 12330d8983SJohannes Doerfert 13330d8983SJohannes Doerfert #include "Shared/APITypes.h" 14330d8983SJohannes Doerfert #include "Shared/Debug.h" 15330d8983SJohannes Doerfert #include "Shared/Environment.h" 16330d8983SJohannes Doerfert 17c95abe94SJohannes Doerfert #include "ErrorReporting.h" 18330d8983SJohannes Doerfert #include "GlobalHandler.h" 19330d8983SJohannes Doerfert #include "JIT.h" 2008533a3eSJohannes Doerfert #include "Shared/Utils.h" 21330d8983SJohannes Doerfert #include "Utils/ELF.h" 22330d8983SJohannes Doerfert #include "omptarget.h" 23330d8983SJohannes Doerfert 24330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT 25330d8983SJohannes Doerfert #include "OpenMP/OMPT/Callback.h" 26330d8983SJohannes Doerfert #include "omp-tools.h" 27330d8983SJohannes Doerfert #endif 28330d8983SJohannes Doerfert 2921f3a609SJoseph Huber #include "llvm/Bitcode/BitcodeReader.h" 30330d8983SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h" 31330d8983SJohannes Doerfert #include "llvm/Support/Error.h" 32330d8983SJohannes Doerfert #include "llvm/Support/JSON.h" 33330d8983SJohannes Doerfert #include "llvm/Support/MathExtras.h" 34330d8983SJohannes Doerfert #include "llvm/Support/MemoryBuffer.h" 35c95abe94SJohannes Doerfert #include "llvm/Support/Signals.h" 36c95abe94SJohannes Doerfert #include "llvm/Support/raw_ostream.h" 37330d8983SJohannes Doerfert 38330d8983SJohannes Doerfert #include <cstdint> 39330d8983SJohannes Doerfert #include <limits> 40330d8983SJohannes Doerfert 41330d8983SJohannes Doerfert using namespace llvm; 42330d8983SJohannes Doerfert using namespace omp; 43330d8983SJohannes Doerfert using namespace target; 44330d8983SJohannes Doerfert using namespace plugin; 45330d8983SJohannes Doerfert 46330d8983SJohannes Doerfert // TODO: Fix any thread safety issues for multi-threaded kernel recording. 47f42f57b5SJoseph Huber namespace llvm::omp::target::plugin { 48330d8983SJohannes Doerfert struct RecordReplayTy { 49330d8983SJohannes Doerfert 50330d8983SJohannes Doerfert // Describes the state of the record replay mechanism. 51330d8983SJohannes Doerfert enum RRStatusTy { RRDeactivated = 0, RRRecording, RRReplaying }; 52330d8983SJohannes Doerfert 53330d8983SJohannes Doerfert private: 54330d8983SJohannes Doerfert // Memory pointers for recording, replaying memory. 55330d8983SJohannes Doerfert void *MemoryStart = nullptr; 56330d8983SJohannes Doerfert void *MemoryPtr = nullptr; 57330d8983SJohannes Doerfert size_t MemorySize = 0; 58330d8983SJohannes Doerfert size_t TotalSize = 0; 59330d8983SJohannes Doerfert GenericDeviceTy *Device = nullptr; 60330d8983SJohannes Doerfert std::mutex AllocationLock; 61330d8983SJohannes Doerfert 62330d8983SJohannes Doerfert RRStatusTy Status = RRDeactivated; 63330d8983SJohannes Doerfert bool ReplaySaveOutput = false; 64330d8983SJohannes Doerfert bool UsedVAMap = false; 65330d8983SJohannes Doerfert uintptr_t MemoryOffset = 0; 66330d8983SJohannes Doerfert 67330d8983SJohannes Doerfert // A list of all globals mapped to the device. 68330d8983SJohannes Doerfert struct GlobalEntry { 69330d8983SJohannes Doerfert const char *Name; 70330d8983SJohannes Doerfert uint64_t Size; 71330d8983SJohannes Doerfert void *Addr; 72330d8983SJohannes Doerfert }; 73330d8983SJohannes Doerfert llvm::SmallVector<GlobalEntry> GlobalEntries{}; 74330d8983SJohannes Doerfert 75330d8983SJohannes Doerfert void *suggestAddress(uint64_t MaxMemoryAllocation) { 76330d8983SJohannes Doerfert // Get a valid pointer address for this system 77330d8983SJohannes Doerfert void *Addr = 78330d8983SJohannes Doerfert Device->allocate(1024, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT); 79330d8983SJohannes Doerfert Device->free(Addr); 80330d8983SJohannes Doerfert // Align Address to MaxMemoryAllocation 8108533a3eSJohannes Doerfert Addr = (void *)utils::alignPtr((Addr), MaxMemoryAllocation); 82330d8983SJohannes Doerfert return Addr; 83330d8983SJohannes Doerfert } 84330d8983SJohannes Doerfert 85330d8983SJohannes Doerfert Error preAllocateVAMemory(uint64_t MaxMemoryAllocation, void *VAddr) { 86330d8983SJohannes Doerfert size_t ASize = MaxMemoryAllocation; 87330d8983SJohannes Doerfert 88330d8983SJohannes Doerfert if (!VAddr && isRecording()) 89330d8983SJohannes Doerfert VAddr = suggestAddress(MaxMemoryAllocation); 90330d8983SJohannes Doerfert 91330d8983SJohannes Doerfert DP("Request %ld bytes allocated at %p\n", MaxMemoryAllocation, VAddr); 92330d8983SJohannes Doerfert 93330d8983SJohannes Doerfert if (auto Err = Device->memoryVAMap(&MemoryStart, VAddr, &ASize)) 94330d8983SJohannes Doerfert return Err; 95330d8983SJohannes Doerfert 96330d8983SJohannes Doerfert if (isReplaying() && VAddr != MemoryStart) { 97330d8983SJohannes Doerfert return Plugin::error("Record-Replay cannot assign the" 98330d8983SJohannes Doerfert "requested recorded address (%p, %p)", 99330d8983SJohannes Doerfert VAddr, MemoryStart); 100330d8983SJohannes Doerfert } 101330d8983SJohannes Doerfert 102330d8983SJohannes Doerfert INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(), 103330d8983SJohannes Doerfert "Allocated %" PRIu64 " bytes at %p for replay.\n", ASize, MemoryStart); 104330d8983SJohannes Doerfert 105330d8983SJohannes Doerfert MemoryPtr = MemoryStart; 106330d8983SJohannes Doerfert MemorySize = 0; 107330d8983SJohannes Doerfert TotalSize = ASize; 108330d8983SJohannes Doerfert UsedVAMap = true; 109330d8983SJohannes Doerfert return Plugin::success(); 110330d8983SJohannes Doerfert } 111330d8983SJohannes Doerfert 112330d8983SJohannes Doerfert Error preAllocateHeuristic(uint64_t MaxMemoryAllocation, 113330d8983SJohannes Doerfert uint64_t RequiredMemoryAllocation, void *VAddr) { 114330d8983SJohannes Doerfert const size_t MAX_MEMORY_ALLOCATION = MaxMemoryAllocation; 115330d8983SJohannes Doerfert constexpr size_t STEP = 1024 * 1024 * 1024ULL; 116330d8983SJohannes Doerfert MemoryStart = nullptr; 117330d8983SJohannes Doerfert for (TotalSize = MAX_MEMORY_ALLOCATION; TotalSize > 0; TotalSize -= STEP) { 118330d8983SJohannes Doerfert MemoryStart = 119330d8983SJohannes Doerfert Device->allocate(TotalSize, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT); 120330d8983SJohannes Doerfert if (MemoryStart) 121330d8983SJohannes Doerfert break; 122330d8983SJohannes Doerfert } 123330d8983SJohannes Doerfert if (!MemoryStart) 124330d8983SJohannes Doerfert return Plugin::error("Allocating record/replay memory"); 125330d8983SJohannes Doerfert 126330d8983SJohannes Doerfert if (VAddr && VAddr != MemoryStart) 127330d8983SJohannes Doerfert MemoryOffset = uintptr_t(VAddr) - uintptr_t(MemoryStart); 128330d8983SJohannes Doerfert 129330d8983SJohannes Doerfert MemoryPtr = MemoryStart; 130330d8983SJohannes Doerfert MemorySize = 0; 131330d8983SJohannes Doerfert 132330d8983SJohannes Doerfert // Check if we need adjustment. 133330d8983SJohannes Doerfert if (MemoryOffset > 0 && 134330d8983SJohannes Doerfert TotalSize >= RequiredMemoryAllocation + MemoryOffset) { 135330d8983SJohannes Doerfert // If we are off but "before" the required address and with enough space, 136330d8983SJohannes Doerfert // we just "allocate" the offset to match the required address. 137330d8983SJohannes Doerfert MemoryPtr = (char *)MemoryPtr + MemoryOffset; 138330d8983SJohannes Doerfert MemorySize += MemoryOffset; 139330d8983SJohannes Doerfert MemoryOffset = 0; 140330d8983SJohannes Doerfert assert(MemoryPtr == VAddr && "Expected offset adjustment to work"); 141330d8983SJohannes Doerfert } else if (MemoryOffset) { 142330d8983SJohannes Doerfert // If we are off and in a situation we cannot just "waste" memory to force 143330d8983SJohannes Doerfert // a match, we hope adjusting the arguments is sufficient. 144330d8983SJohannes Doerfert REPORT( 145330d8983SJohannes Doerfert "WARNING Failed to allocate replay memory at required location %p, " 146330d8983SJohannes Doerfert "got %p, trying to offset argument pointers by %" PRIi64 "\n", 147330d8983SJohannes Doerfert VAddr, MemoryStart, MemoryOffset); 148330d8983SJohannes Doerfert } 149330d8983SJohannes Doerfert 150330d8983SJohannes Doerfert INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(), 151330d8983SJohannes Doerfert "Allocated %" PRIu64 " bytes at %p for replay.\n", TotalSize, 152330d8983SJohannes Doerfert MemoryStart); 153330d8983SJohannes Doerfert 154330d8983SJohannes Doerfert return Plugin::success(); 155330d8983SJohannes Doerfert } 156330d8983SJohannes Doerfert 157330d8983SJohannes Doerfert Error preallocateDeviceMemory(uint64_t DeviceMemorySize, void *ReqVAddr) { 158330d8983SJohannes Doerfert if (Device->supportVAManagement()) { 159330d8983SJohannes Doerfert auto Err = preAllocateVAMemory(DeviceMemorySize, ReqVAddr); 160330d8983SJohannes Doerfert if (Err) { 161330d8983SJohannes Doerfert REPORT("WARNING VA mapping failed, fallback to heuristic: " 162330d8983SJohannes Doerfert "(Error: %s)\n", 163330d8983SJohannes Doerfert toString(std::move(Err)).data()); 164330d8983SJohannes Doerfert } 165330d8983SJohannes Doerfert } 166330d8983SJohannes Doerfert 167330d8983SJohannes Doerfert uint64_t DevMemSize; 168330d8983SJohannes Doerfert if (Device->getDeviceMemorySize(DevMemSize)) 169330d8983SJohannes Doerfert return Plugin::error("Cannot determine Device Memory Size"); 170330d8983SJohannes Doerfert 171330d8983SJohannes Doerfert return preAllocateHeuristic(DevMemSize, DeviceMemorySize, ReqVAddr); 172330d8983SJohannes Doerfert } 173330d8983SJohannes Doerfert 174330d8983SJohannes Doerfert void dumpDeviceMemory(StringRef Filename) { 175330d8983SJohannes Doerfert ErrorOr<std::unique_ptr<WritableMemoryBuffer>> DeviceMemoryMB = 176330d8983SJohannes Doerfert WritableMemoryBuffer::getNewUninitMemBuffer(MemorySize); 177330d8983SJohannes Doerfert if (!DeviceMemoryMB) 178330d8983SJohannes Doerfert report_fatal_error("Error creating MemoryBuffer for device memory"); 179330d8983SJohannes Doerfert 180330d8983SJohannes Doerfert auto Err = Device->dataRetrieve(DeviceMemoryMB.get()->getBufferStart(), 181330d8983SJohannes Doerfert MemoryStart, MemorySize, nullptr); 182330d8983SJohannes Doerfert if (Err) 183330d8983SJohannes Doerfert report_fatal_error("Error retrieving data for target pointer"); 184330d8983SJohannes Doerfert 185330d8983SJohannes Doerfert StringRef DeviceMemory(DeviceMemoryMB.get()->getBufferStart(), MemorySize); 186330d8983SJohannes Doerfert std::error_code EC; 187330d8983SJohannes Doerfert raw_fd_ostream OS(Filename, EC); 188330d8983SJohannes Doerfert if (EC) 189330d8983SJohannes Doerfert report_fatal_error("Error dumping memory to file " + Filename + " :" + 190330d8983SJohannes Doerfert EC.message()); 191330d8983SJohannes Doerfert OS << DeviceMemory; 192330d8983SJohannes Doerfert OS.close(); 193330d8983SJohannes Doerfert } 194330d8983SJohannes Doerfert 195330d8983SJohannes Doerfert public: 196330d8983SJohannes Doerfert bool isRecording() const { return Status == RRStatusTy::RRRecording; } 197330d8983SJohannes Doerfert bool isReplaying() const { return Status == RRStatusTy::RRReplaying; } 198330d8983SJohannes Doerfert bool isRecordingOrReplaying() const { 199330d8983SJohannes Doerfert return (Status != RRStatusTy::RRDeactivated); 200330d8983SJohannes Doerfert } 201330d8983SJohannes Doerfert void setStatus(RRStatusTy Status) { this->Status = Status; } 202330d8983SJohannes Doerfert bool isSaveOutputEnabled() const { return ReplaySaveOutput; } 203330d8983SJohannes Doerfert void addEntry(const char *Name, uint64_t Size, void *Addr) { 204330d8983SJohannes Doerfert GlobalEntries.emplace_back(GlobalEntry{Name, Size, Addr}); 205330d8983SJohannes Doerfert } 206330d8983SJohannes Doerfert 207330d8983SJohannes Doerfert void saveImage(const char *Name, const DeviceImageTy &Image) { 208330d8983SJohannes Doerfert SmallString<128> ImageName = {Name, ".image"}; 209330d8983SJohannes Doerfert std::error_code EC; 210330d8983SJohannes Doerfert raw_fd_ostream OS(ImageName, EC); 211330d8983SJohannes Doerfert if (EC) 212330d8983SJohannes Doerfert report_fatal_error("Error saving image : " + StringRef(EC.message())); 213330d8983SJohannes Doerfert if (const auto *TgtImageBitcode = Image.getTgtImageBitcode()) { 21408533a3eSJohannes Doerfert size_t Size = utils::getPtrDiff(TgtImageBitcode->ImageEnd, 21508533a3eSJohannes Doerfert TgtImageBitcode->ImageStart); 216330d8983SJohannes Doerfert MemoryBufferRef MBR = MemoryBufferRef( 217330d8983SJohannes Doerfert StringRef((const char *)TgtImageBitcode->ImageStart, Size), ""); 218330d8983SJohannes Doerfert OS << MBR.getBuffer(); 219330d8983SJohannes Doerfert } else { 220330d8983SJohannes Doerfert OS << Image.getMemoryBuffer().getBuffer(); 221330d8983SJohannes Doerfert } 222330d8983SJohannes Doerfert OS.close(); 223330d8983SJohannes Doerfert } 224330d8983SJohannes Doerfert 225330d8983SJohannes Doerfert void dumpGlobals(StringRef Filename, DeviceImageTy &Image) { 226330d8983SJohannes Doerfert int32_t Size = 0; 227330d8983SJohannes Doerfert 228330d8983SJohannes Doerfert for (auto &OffloadEntry : GlobalEntries) { 229330d8983SJohannes Doerfert if (!OffloadEntry.Size) 230330d8983SJohannes Doerfert continue; 231330d8983SJohannes Doerfert // Get the total size of the string and entry including the null byte. 232330d8983SJohannes Doerfert Size += std::strlen(OffloadEntry.Name) + 1 + sizeof(uint32_t) + 233330d8983SJohannes Doerfert OffloadEntry.Size; 234330d8983SJohannes Doerfert } 235330d8983SJohannes Doerfert 236330d8983SJohannes Doerfert ErrorOr<std::unique_ptr<WritableMemoryBuffer>> GlobalsMB = 237330d8983SJohannes Doerfert WritableMemoryBuffer::getNewUninitMemBuffer(Size); 238330d8983SJohannes Doerfert if (!GlobalsMB) 239330d8983SJohannes Doerfert report_fatal_error("Error creating MemoryBuffer for globals memory"); 240330d8983SJohannes Doerfert 241330d8983SJohannes Doerfert void *BufferPtr = GlobalsMB.get()->getBufferStart(); 242330d8983SJohannes Doerfert for (auto &OffloadEntry : GlobalEntries) { 243330d8983SJohannes Doerfert if (!OffloadEntry.Size) 244330d8983SJohannes Doerfert continue; 245330d8983SJohannes Doerfert 246330d8983SJohannes Doerfert int32_t NameLength = std::strlen(OffloadEntry.Name) + 1; 247330d8983SJohannes Doerfert memcpy(BufferPtr, OffloadEntry.Name, NameLength); 24808533a3eSJohannes Doerfert BufferPtr = utils::advancePtr(BufferPtr, NameLength); 249330d8983SJohannes Doerfert 250330d8983SJohannes Doerfert *((uint32_t *)(BufferPtr)) = OffloadEntry.Size; 25108533a3eSJohannes Doerfert BufferPtr = utils::advancePtr(BufferPtr, sizeof(uint32_t)); 252330d8983SJohannes Doerfert 253330d8983SJohannes Doerfert auto Err = Plugin::success(); 254330d8983SJohannes Doerfert { 255330d8983SJohannes Doerfert if (auto Err = Device->dataRetrieve(BufferPtr, OffloadEntry.Addr, 256330d8983SJohannes Doerfert OffloadEntry.Size, nullptr)) 257330d8983SJohannes Doerfert report_fatal_error("Error retrieving data for global"); 258330d8983SJohannes Doerfert } 259330d8983SJohannes Doerfert if (Err) 260330d8983SJohannes Doerfert report_fatal_error("Error retrieving data for global"); 26108533a3eSJohannes Doerfert BufferPtr = utils::advancePtr(BufferPtr, OffloadEntry.Size); 262330d8983SJohannes Doerfert } 263330d8983SJohannes Doerfert assert(BufferPtr == GlobalsMB->get()->getBufferEnd() && 264330d8983SJohannes Doerfert "Buffer over/under-filled."); 26508533a3eSJohannes Doerfert assert(Size == utils::getPtrDiff(BufferPtr, 26608533a3eSJohannes Doerfert GlobalsMB->get()->getBufferStart()) && 267330d8983SJohannes Doerfert "Buffer size mismatch"); 268330d8983SJohannes Doerfert 269330d8983SJohannes Doerfert StringRef GlobalsMemory(GlobalsMB.get()->getBufferStart(), Size); 270330d8983SJohannes Doerfert std::error_code EC; 271330d8983SJohannes Doerfert raw_fd_ostream OS(Filename, EC); 272330d8983SJohannes Doerfert OS << GlobalsMemory; 273330d8983SJohannes Doerfert OS.close(); 274330d8983SJohannes Doerfert } 275330d8983SJohannes Doerfert 27654b5c76dSJohannes Doerfert void saveKernelDescr(const char *Name, KernelLaunchParamsTy LaunchParams, 27754b5c76dSJohannes Doerfert int32_t NumArgs, uint64_t NumTeamsClause, 27854b5c76dSJohannes Doerfert uint32_t ThreadLimitClause, uint64_t LoopTripCount) { 279330d8983SJohannes Doerfert json::Object JsonKernelInfo; 280330d8983SJohannes Doerfert JsonKernelInfo["Name"] = Name; 281330d8983SJohannes Doerfert JsonKernelInfo["NumArgs"] = NumArgs; 282330d8983SJohannes Doerfert JsonKernelInfo["NumTeamsClause"] = NumTeamsClause; 283330d8983SJohannes Doerfert JsonKernelInfo["ThreadLimitClause"] = ThreadLimitClause; 284330d8983SJohannes Doerfert JsonKernelInfo["LoopTripCount"] = LoopTripCount; 285330d8983SJohannes Doerfert JsonKernelInfo["DeviceMemorySize"] = MemorySize; 286330d8983SJohannes Doerfert JsonKernelInfo["DeviceId"] = Device->getDeviceId(); 287330d8983SJohannes Doerfert JsonKernelInfo["BumpAllocVAStart"] = (intptr_t)MemoryStart; 288330d8983SJohannes Doerfert 289330d8983SJohannes Doerfert json::Array JsonArgPtrs; 290330d8983SJohannes Doerfert for (int I = 0; I < NumArgs; ++I) 29154b5c76dSJohannes Doerfert JsonArgPtrs.push_back((intptr_t)LaunchParams.Ptrs[I]); 292330d8983SJohannes Doerfert JsonKernelInfo["ArgPtrs"] = json::Value(std::move(JsonArgPtrs)); 293330d8983SJohannes Doerfert 294330d8983SJohannes Doerfert json::Array JsonArgOffsets; 295330d8983SJohannes Doerfert for (int I = 0; I < NumArgs; ++I) 296330d8983SJohannes Doerfert JsonArgOffsets.push_back(0); 297330d8983SJohannes Doerfert JsonKernelInfo["ArgOffsets"] = json::Value(std::move(JsonArgOffsets)); 298330d8983SJohannes Doerfert 299330d8983SJohannes Doerfert SmallString<128> JsonFilename = {Name, ".json"}; 300330d8983SJohannes Doerfert std::error_code EC; 301330d8983SJohannes Doerfert raw_fd_ostream JsonOS(JsonFilename.str(), EC); 302330d8983SJohannes Doerfert if (EC) 303330d8983SJohannes Doerfert report_fatal_error("Error saving kernel json file : " + 304330d8983SJohannes Doerfert StringRef(EC.message())); 305330d8983SJohannes Doerfert JsonOS << json::Value(std::move(JsonKernelInfo)); 306330d8983SJohannes Doerfert JsonOS.close(); 307330d8983SJohannes Doerfert } 308330d8983SJohannes Doerfert 309330d8983SJohannes Doerfert void saveKernelInput(const char *Name, DeviceImageTy &Image) { 310330d8983SJohannes Doerfert SmallString<128> GlobalsFilename = {Name, ".globals"}; 311330d8983SJohannes Doerfert dumpGlobals(GlobalsFilename, Image); 312330d8983SJohannes Doerfert 313330d8983SJohannes Doerfert SmallString<128> MemoryFilename = {Name, ".memory"}; 314330d8983SJohannes Doerfert dumpDeviceMemory(MemoryFilename); 315330d8983SJohannes Doerfert } 316330d8983SJohannes Doerfert 317330d8983SJohannes Doerfert void saveKernelOutputInfo(const char *Name) { 318330d8983SJohannes Doerfert SmallString<128> OutputFilename = { 319330d8983SJohannes Doerfert Name, (isRecording() ? ".original.output" : ".replay.output")}; 320330d8983SJohannes Doerfert dumpDeviceMemory(OutputFilename); 321330d8983SJohannes Doerfert } 322330d8983SJohannes Doerfert 323330d8983SJohannes Doerfert void *alloc(uint64_t Size) { 324330d8983SJohannes Doerfert assert(MemoryStart && "Expected memory has been pre-allocated"); 325330d8983SJohannes Doerfert void *Alloc = nullptr; 326330d8983SJohannes Doerfert constexpr int Alignment = 16; 327330d8983SJohannes Doerfert // Assumes alignment is a power of 2. 328330d8983SJohannes Doerfert int64_t AlignedSize = (Size + (Alignment - 1)) & (~(Alignment - 1)); 329330d8983SJohannes Doerfert std::lock_guard<std::mutex> LG(AllocationLock); 330330d8983SJohannes Doerfert Alloc = MemoryPtr; 331330d8983SJohannes Doerfert MemoryPtr = (char *)MemoryPtr + AlignedSize; 332330d8983SJohannes Doerfert MemorySize += AlignedSize; 333330d8983SJohannes Doerfert DP("Memory Allocator return " DPxMOD "\n", DPxPTR(Alloc)); 334330d8983SJohannes Doerfert return Alloc; 335330d8983SJohannes Doerfert } 336330d8983SJohannes Doerfert 337330d8983SJohannes Doerfert Error init(GenericDeviceTy *Device, uint64_t MemSize, void *VAddr, 338330d8983SJohannes Doerfert RRStatusTy Status, bool SaveOutput, uint64_t &ReqPtrArgOffset) { 339330d8983SJohannes Doerfert this->Device = Device; 340330d8983SJohannes Doerfert this->Status = Status; 341330d8983SJohannes Doerfert this->ReplaySaveOutput = SaveOutput; 342330d8983SJohannes Doerfert 343330d8983SJohannes Doerfert if (auto Err = preallocateDeviceMemory(MemSize, VAddr)) 344330d8983SJohannes Doerfert return Err; 345330d8983SJohannes Doerfert 346330d8983SJohannes Doerfert INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(), 347330d8983SJohannes Doerfert "Record Replay Initialized (%p)" 348330d8983SJohannes Doerfert " as starting address, %lu Memory Size" 349330d8983SJohannes Doerfert " and set on status %s\n", 350330d8983SJohannes Doerfert MemoryStart, TotalSize, 351330d8983SJohannes Doerfert Status == RRStatusTy::RRRecording ? "Recording" : "Replaying"); 352330d8983SJohannes Doerfert 353330d8983SJohannes Doerfert // Tell the user to offset pointer arguments as the memory allocation does 354330d8983SJohannes Doerfert // not match. 355330d8983SJohannes Doerfert ReqPtrArgOffset = MemoryOffset; 356330d8983SJohannes Doerfert return Plugin::success(); 357330d8983SJohannes Doerfert } 358330d8983SJohannes Doerfert 359330d8983SJohannes Doerfert void deinit() { 360330d8983SJohannes Doerfert if (UsedVAMap) { 361330d8983SJohannes Doerfert if (auto Err = Device->memoryVAUnMap(MemoryStart, TotalSize)) 362330d8983SJohannes Doerfert report_fatal_error("Error on releasing virtual memory space"); 363330d8983SJohannes Doerfert } else { 364330d8983SJohannes Doerfert Device->free(MemoryStart); 365330d8983SJohannes Doerfert } 366330d8983SJohannes Doerfert } 367330d8983SJohannes Doerfert }; 368f42f57b5SJoseph Huber } // namespace llvm::omp::target::plugin 369330d8983SJohannes Doerfert 370330d8983SJohannes Doerfert // Extract the mapping of host function pointers to device function pointers 371330d8983SJohannes Doerfert // from the entry table. Functions marked as 'indirect' in OpenMP will have 372330d8983SJohannes Doerfert // offloading entries generated for them which map the host's function pointer 373330d8983SJohannes Doerfert // to a global containing the corresponding function pointer on the device. 374330d8983SJohannes Doerfert static Expected<std::pair<void *, uint64_t>> 375330d8983SJohannes Doerfert setupIndirectCallTable(GenericPluginTy &Plugin, GenericDeviceTy &Device, 376330d8983SJohannes Doerfert DeviceImageTy &Image) { 377330d8983SJohannes Doerfert GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); 378330d8983SJohannes Doerfert 3796518b121SJoseph Huber llvm::ArrayRef<llvm::offloading::EntryTy> Entries( 3806518b121SJoseph Huber Image.getTgtImage()->EntriesBegin, Image.getTgtImage()->EntriesEnd); 381330d8983SJohannes Doerfert llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable; 382330d8983SJohannes Doerfert for (const auto &Entry : Entries) { 3836518b121SJoseph Huber if (Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT)) 384330d8983SJohannes Doerfert continue; 385330d8983SJohannes Doerfert 3866518b121SJoseph Huber assert(Entry.Size == sizeof(void *) && "Global not a function pointer?"); 387330d8983SJohannes Doerfert auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); 388330d8983SJohannes Doerfert 3896518b121SJoseph Huber GlobalTy DeviceGlobal(Entry.SymbolName, Entry.Size); 390330d8983SJohannes Doerfert if (auto Err = 391330d8983SJohannes Doerfert Handler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) 392330d8983SJohannes Doerfert return std::move(Err); 393330d8983SJohannes Doerfert 3946518b121SJoseph Huber HstPtr = Entry.Address; 395330d8983SJohannes Doerfert if (auto Err = Device.dataRetrieve(&DevPtr, DeviceGlobal.getPtr(), 3966518b121SJoseph Huber Entry.Size, nullptr)) 397330d8983SJohannes Doerfert return std::move(Err); 398330d8983SJohannes Doerfert } 399330d8983SJohannes Doerfert 400330d8983SJohannes Doerfert // If we do not have any indirect globals we exit early. 401330d8983SJohannes Doerfert if (IndirectCallTable.empty()) 402330d8983SJohannes Doerfert return std::pair{nullptr, 0}; 403330d8983SJohannes Doerfert 404330d8983SJohannes Doerfert // Sort the array to allow for more efficient lookup of device pointers. 405330d8983SJohannes Doerfert llvm::sort(IndirectCallTable, 406330d8983SJohannes Doerfert [](const auto &x, const auto &y) { return x.first < y.first; }); 407330d8983SJohannes Doerfert 408330d8983SJohannes Doerfert uint64_t TableSize = 409330d8983SJohannes Doerfert IndirectCallTable.size() * sizeof(std::pair<void *, void *>); 410330d8983SJohannes Doerfert void *DevicePtr = Device.allocate(TableSize, nullptr, TARGET_ALLOC_DEVICE); 411330d8983SJohannes Doerfert if (auto Err = Device.dataSubmit(DevicePtr, IndirectCallTable.data(), 412330d8983SJohannes Doerfert TableSize, nullptr)) 413330d8983SJohannes Doerfert return std::move(Err); 414330d8983SJohannes Doerfert return std::pair<void *, uint64_t>(DevicePtr, IndirectCallTable.size()); 415330d8983SJohannes Doerfert } 416330d8983SJohannes Doerfert 417330d8983SJohannes Doerfert AsyncInfoWrapperTy::AsyncInfoWrapperTy(GenericDeviceTy &Device, 418330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr) 419330d8983SJohannes Doerfert : Device(Device), 420330d8983SJohannes Doerfert AsyncInfoPtr(AsyncInfoPtr ? AsyncInfoPtr : &LocalAsyncInfo) {} 421330d8983SJohannes Doerfert 422330d8983SJohannes Doerfert void AsyncInfoWrapperTy::finalize(Error &Err) { 423330d8983SJohannes Doerfert assert(AsyncInfoPtr && "AsyncInfoWrapperTy already finalized"); 424330d8983SJohannes Doerfert 425330d8983SJohannes Doerfert // If we used a local async info object we want synchronous behavior. In that 426330d8983SJohannes Doerfert // case, and assuming the current status code is correct, we will synchronize 427330d8983SJohannes Doerfert // explicitly when the object is deleted. Update the error with the result of 428330d8983SJohannes Doerfert // the synchronize operation. 429330d8983SJohannes Doerfert if (AsyncInfoPtr == &LocalAsyncInfo && LocalAsyncInfo.Queue && !Err) 430330d8983SJohannes Doerfert Err = Device.synchronize(&LocalAsyncInfo); 431330d8983SJohannes Doerfert 432330d8983SJohannes Doerfert // Invalidate the wrapper object. 433330d8983SJohannes Doerfert AsyncInfoPtr = nullptr; 434330d8983SJohannes Doerfert } 435330d8983SJohannes Doerfert 436330d8983SJohannes Doerfert Error GenericKernelTy::init(GenericDeviceTy &GenericDevice, 437330d8983SJohannes Doerfert DeviceImageTy &Image) { 438330d8983SJohannes Doerfert 439330d8983SJohannes Doerfert ImagePtr = &Image; 440330d8983SJohannes Doerfert 441330d8983SJohannes Doerfert // Retrieve kernel environment object for the kernel. 442330d8983SJohannes Doerfert GlobalTy KernelEnv(std::string(Name) + "_kernel_environment", 443330d8983SJohannes Doerfert sizeof(KernelEnvironment), &KernelEnvironment); 444330d8983SJohannes Doerfert GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler(); 445330d8983SJohannes Doerfert if (auto Err = 446330d8983SJohannes Doerfert GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) { 447330d8983SJohannes Doerfert [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); 448330d8983SJohannes Doerfert DP("Failed to read kernel environment for '%s': %s\n" 449330d8983SJohannes Doerfert "Using default SPMD (2) execution mode\n", 450330d8983SJohannes Doerfert Name, ErrStr.data()); 451330d8983SJohannes Doerfert assert(KernelEnvironment.Configuration.ReductionDataSize == 0 && 452330d8983SJohannes Doerfert "Default initialization failed."); 453330d8983SJohannes Doerfert IsBareKernel = true; 454330d8983SJohannes Doerfert } 455330d8983SJohannes Doerfert 456330d8983SJohannes Doerfert // Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max; 457330d8983SJohannes Doerfert MaxNumThreads = KernelEnvironment.Configuration.MaxThreads > 0 458330d8983SJohannes Doerfert ? std::min(KernelEnvironment.Configuration.MaxThreads, 459330d8983SJohannes Doerfert int32_t(GenericDevice.getThreadLimit())) 460330d8983SJohannes Doerfert : GenericDevice.getThreadLimit(); 461330d8983SJohannes Doerfert 462330d8983SJohannes Doerfert // Pref = Config.Pref > 0 ? max(Config.Pref, Device.Pref) : Device.Pref; 463330d8983SJohannes Doerfert PreferredNumThreads = 464330d8983SJohannes Doerfert KernelEnvironment.Configuration.MinThreads > 0 465330d8983SJohannes Doerfert ? std::max(KernelEnvironment.Configuration.MinThreads, 466330d8983SJohannes Doerfert int32_t(GenericDevice.getDefaultNumThreads())) 467330d8983SJohannes Doerfert : GenericDevice.getDefaultNumThreads(); 468330d8983SJohannes Doerfert 469330d8983SJohannes Doerfert return initImpl(GenericDevice, Image); 470330d8983SJohannes Doerfert } 471330d8983SJohannes Doerfert 472330d8983SJohannes Doerfert Expected<KernelLaunchEnvironmentTy *> 473330d8983SJohannes Doerfert GenericKernelTy::getKernelLaunchEnvironment( 474330d8983SJohannes Doerfert GenericDeviceTy &GenericDevice, uint32_t Version, 475330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfoWrapper) const { 476330d8983SJohannes Doerfert // Ctor/Dtor have no arguments, replaying uses the original kernel launch 477330d8983SJohannes Doerfert // environment. Older versions of the compiler do not generate a kernel 478330d8983SJohannes Doerfert // launch environment. 479f42f57b5SJoseph Huber if (GenericDevice.Plugin.getRecordReplay().isReplaying() || 480330d8983SJohannes Doerfert Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR) 481330d8983SJohannes Doerfert return nullptr; 482330d8983SJohannes Doerfert 483330d8983SJohannes Doerfert if (!KernelEnvironment.Configuration.ReductionDataSize || 484330d8983SJohannes Doerfert !KernelEnvironment.Configuration.ReductionBufferLength) 485330d8983SJohannes Doerfert return reinterpret_cast<KernelLaunchEnvironmentTy *>(~0); 486330d8983SJohannes Doerfert 487330d8983SJohannes Doerfert // TODO: Check if the kernel needs a launch environment. 488330d8983SJohannes Doerfert auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy), 489330d8983SJohannes Doerfert /*HostPtr=*/nullptr, 490330d8983SJohannes Doerfert TargetAllocTy::TARGET_ALLOC_DEVICE); 491330d8983SJohannes Doerfert if (!AllocOrErr) 492330d8983SJohannes Doerfert return AllocOrErr.takeError(); 493330d8983SJohannes Doerfert 494330d8983SJohannes Doerfert // Remember to free the memory later. 495330d8983SJohannes Doerfert AsyncInfoWrapper.freeAllocationAfterSynchronization(*AllocOrErr); 496330d8983SJohannes Doerfert 497330d8983SJohannes Doerfert /// Use the KLE in the __tgt_async_info to ensure a stable address for the 498330d8983SJohannes Doerfert /// async data transfer. 499330d8983SJohannes Doerfert auto &LocalKLE = (*AsyncInfoWrapper).KernelLaunchEnvironment; 500330d8983SJohannes Doerfert LocalKLE = KernelLaunchEnvironment; 501330d8983SJohannes Doerfert { 502330d8983SJohannes Doerfert auto AllocOrErr = GenericDevice.dataAlloc( 503330d8983SJohannes Doerfert KernelEnvironment.Configuration.ReductionDataSize * 504330d8983SJohannes Doerfert KernelEnvironment.Configuration.ReductionBufferLength, 505330d8983SJohannes Doerfert /*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE); 506330d8983SJohannes Doerfert if (!AllocOrErr) 507330d8983SJohannes Doerfert return AllocOrErr.takeError(); 508330d8983SJohannes Doerfert LocalKLE.ReductionBuffer = *AllocOrErr; 509330d8983SJohannes Doerfert // Remember to free the memory later. 510330d8983SJohannes Doerfert AsyncInfoWrapper.freeAllocationAfterSynchronization(*AllocOrErr); 511330d8983SJohannes Doerfert } 512330d8983SJohannes Doerfert 513330d8983SJohannes Doerfert INFO(OMP_INFOTYPE_DATA_TRANSFER, GenericDevice.getDeviceId(), 514330d8983SJohannes Doerfert "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD 515330d8983SJohannes Doerfert ", Size=%" PRId64 ", Name=KernelLaunchEnv\n", 516330d8983SJohannes Doerfert DPxPTR(&LocalKLE), DPxPTR(*AllocOrErr), 517330d8983SJohannes Doerfert sizeof(KernelLaunchEnvironmentTy)); 518330d8983SJohannes Doerfert 519330d8983SJohannes Doerfert auto Err = GenericDevice.dataSubmit(*AllocOrErr, &LocalKLE, 520330d8983SJohannes Doerfert sizeof(KernelLaunchEnvironmentTy), 521330d8983SJohannes Doerfert AsyncInfoWrapper); 522330d8983SJohannes Doerfert if (Err) 523330d8983SJohannes Doerfert return Err; 524330d8983SJohannes Doerfert return static_cast<KernelLaunchEnvironmentTy *>(*AllocOrErr); 525330d8983SJohannes Doerfert } 526330d8983SJohannes Doerfert 527330d8983SJohannes Doerfert Error GenericKernelTy::printLaunchInfo(GenericDeviceTy &GenericDevice, 528330d8983SJohannes Doerfert KernelArgsTy &KernelArgs, 52992376c3fSShilei Tian uint32_t NumThreads[3], 53092376c3fSShilei Tian uint32_t NumBlocks[3]) const { 531330d8983SJohannes Doerfert INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(), 53292376c3fSShilei Tian "Launching kernel %s with [%u,%u,%u] blocks and [%u,%u,%u] threads in " 53392376c3fSShilei Tian "%s mode\n", 53492376c3fSShilei Tian getName(), NumBlocks[0], NumBlocks[1], NumBlocks[2], NumThreads[0], 53592376c3fSShilei Tian NumThreads[1], NumThreads[2], getExecutionModeName()); 536330d8983SJohannes Doerfert return printLaunchInfoDetails(GenericDevice, KernelArgs, NumThreads, 537330d8983SJohannes Doerfert NumBlocks); 538330d8983SJohannes Doerfert } 539330d8983SJohannes Doerfert 540330d8983SJohannes Doerfert Error GenericKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice, 541330d8983SJohannes Doerfert KernelArgsTy &KernelArgs, 54292376c3fSShilei Tian uint32_t NumThreads[3], 54392376c3fSShilei Tian uint32_t NumBlocks[3]) const { 544330d8983SJohannes Doerfert return Plugin::success(); 545330d8983SJohannes Doerfert } 546330d8983SJohannes Doerfert 547330d8983SJohannes Doerfert Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs, 548330d8983SJohannes Doerfert ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs, 549330d8983SJohannes Doerfert AsyncInfoWrapperTy &AsyncInfoWrapper) const { 550330d8983SJohannes Doerfert llvm::SmallVector<void *, 16> Args; 551330d8983SJohannes Doerfert llvm::SmallVector<void *, 16> Ptrs; 552330d8983SJohannes Doerfert 553330d8983SJohannes Doerfert auto KernelLaunchEnvOrErr = getKernelLaunchEnvironment( 554330d8983SJohannes Doerfert GenericDevice, KernelArgs.Version, AsyncInfoWrapper); 555330d8983SJohannes Doerfert if (!KernelLaunchEnvOrErr) 556330d8983SJohannes Doerfert return KernelLaunchEnvOrErr.takeError(); 557330d8983SJohannes Doerfert 55880525dfcSJohannes Doerfert KernelLaunchParamsTy LaunchParams; 55980525dfcSJohannes Doerfert 56080525dfcSJohannes Doerfert // Kernel languages don't use indirection. 56180525dfcSJohannes Doerfert if (KernelArgs.Flags.IsCUDA) { 56280525dfcSJohannes Doerfert LaunchParams = 56380525dfcSJohannes Doerfert *reinterpret_cast<KernelLaunchParamsTy *>(KernelArgs.ArgPtrs); 56480525dfcSJohannes Doerfert } else { 56580525dfcSJohannes Doerfert LaunchParams = 56680525dfcSJohannes Doerfert prepareArgs(GenericDevice, ArgPtrs, ArgOffsets, KernelArgs.NumArgs, 56780525dfcSJohannes Doerfert Args, Ptrs, *KernelLaunchEnvOrErr); 56880525dfcSJohannes Doerfert } 569330d8983SJohannes Doerfert 57092376c3fSShilei Tian uint32_t NumThreads[3] = {KernelArgs.ThreadLimit[0], 57192376c3fSShilei Tian KernelArgs.ThreadLimit[1], 57292376c3fSShilei Tian KernelArgs.ThreadLimit[2]}; 57392376c3fSShilei Tian uint32_t NumBlocks[3] = {KernelArgs.NumTeams[0], KernelArgs.NumTeams[1], 57492376c3fSShilei Tian KernelArgs.NumTeams[2]}; 57592376c3fSShilei Tian if (!IsBareKernel) { 57692376c3fSShilei Tian NumThreads[0] = getNumThreads(GenericDevice, NumThreads); 57792376c3fSShilei Tian NumBlocks[0] = getNumBlocks(GenericDevice, NumBlocks, KernelArgs.Tripcount, 57892376c3fSShilei Tian NumThreads[0], KernelArgs.ThreadLimit[0] > 0); 57992376c3fSShilei Tian } 580330d8983SJohannes Doerfert 581330d8983SJohannes Doerfert // Record the kernel description after we modified the argument count and num 582330d8983SJohannes Doerfert // blocks/threads. 583f42f57b5SJoseph Huber RecordReplayTy &RecordReplay = GenericDevice.Plugin.getRecordReplay(); 584330d8983SJohannes Doerfert if (RecordReplay.isRecording()) { 585330d8983SJohannes Doerfert RecordReplay.saveImage(getName(), getImage()); 586330d8983SJohannes Doerfert RecordReplay.saveKernelInput(getName(), getImage()); 58754b5c76dSJohannes Doerfert RecordReplay.saveKernelDescr(getName(), LaunchParams, KernelArgs.NumArgs, 58892376c3fSShilei Tian NumBlocks[0], NumThreads[0], 58992376c3fSShilei Tian KernelArgs.Tripcount); 590330d8983SJohannes Doerfert } 591330d8983SJohannes Doerfert 592330d8983SJohannes Doerfert if (auto Err = 593330d8983SJohannes Doerfert printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks)) 594330d8983SJohannes Doerfert return Err; 595330d8983SJohannes Doerfert 596330d8983SJohannes Doerfert return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs, 59754b5c76dSJohannes Doerfert LaunchParams, AsyncInfoWrapper); 598330d8983SJohannes Doerfert } 599330d8983SJohannes Doerfert 60054b5c76dSJohannes Doerfert KernelLaunchParamsTy GenericKernelTy::prepareArgs( 601330d8983SJohannes Doerfert GenericDeviceTy &GenericDevice, void **ArgPtrs, ptrdiff_t *ArgOffsets, 602330d8983SJohannes Doerfert uint32_t &NumArgs, llvm::SmallVectorImpl<void *> &Args, 603330d8983SJohannes Doerfert llvm::SmallVectorImpl<void *> &Ptrs, 604330d8983SJohannes Doerfert KernelLaunchEnvironmentTy *KernelLaunchEnvironment) const { 605330d8983SJohannes Doerfert uint32_t KLEOffset = !!KernelLaunchEnvironment; 606330d8983SJohannes Doerfert NumArgs += KLEOffset; 607330d8983SJohannes Doerfert 608330d8983SJohannes Doerfert if (NumArgs == 0) 60954b5c76dSJohannes Doerfert return KernelLaunchParamsTy{}; 610330d8983SJohannes Doerfert 611330d8983SJohannes Doerfert Args.resize(NumArgs); 612330d8983SJohannes Doerfert Ptrs.resize(NumArgs); 613330d8983SJohannes Doerfert 614330d8983SJohannes Doerfert if (KernelLaunchEnvironment) { 61554b5c76dSJohannes Doerfert Args[0] = KernelLaunchEnvironment; 61654b5c76dSJohannes Doerfert Ptrs[0] = &Args[0]; 617330d8983SJohannes Doerfert } 618330d8983SJohannes Doerfert 61981d20d86SJoseph Huber for (uint32_t I = KLEOffset; I < NumArgs; ++I) { 62054b5c76dSJohannes Doerfert Args[I] = 621330d8983SJohannes Doerfert (void *)((intptr_t)ArgPtrs[I - KLEOffset] + ArgOffsets[I - KLEOffset]); 62254b5c76dSJohannes Doerfert Ptrs[I] = &Args[I]; 623330d8983SJohannes Doerfert } 62454b5c76dSJohannes Doerfert return KernelLaunchParamsTy{sizeof(void *) * NumArgs, &Args[0], &Ptrs[0]}; 625330d8983SJohannes Doerfert } 626330d8983SJohannes Doerfert 627330d8983SJohannes Doerfert uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice, 628330d8983SJohannes Doerfert uint32_t ThreadLimitClause[3]) const { 62992376c3fSShilei Tian assert(!IsBareKernel && "bare kernel should not call this function"); 630330d8983SJohannes Doerfert 63192376c3fSShilei Tian assert(ThreadLimitClause[1] == 1 && ThreadLimitClause[2] == 1 && 63292376c3fSShilei Tian "Multi dimensional launch not supported yet."); 633330d8983SJohannes Doerfert 634330d8983SJohannes Doerfert if (ThreadLimitClause[0] > 0 && isGenericMode()) 635330d8983SJohannes Doerfert ThreadLimitClause[0] += GenericDevice.getWarpSize(); 636330d8983SJohannes Doerfert 637330d8983SJohannes Doerfert return std::min(MaxNumThreads, (ThreadLimitClause[0] > 0) 638330d8983SJohannes Doerfert ? ThreadLimitClause[0] 639330d8983SJohannes Doerfert : PreferredNumThreads); 640330d8983SJohannes Doerfert } 641330d8983SJohannes Doerfert 64292376c3fSShilei Tian uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice, 643330d8983SJohannes Doerfert uint32_t NumTeamsClause[3], 644330d8983SJohannes Doerfert uint64_t LoopTripCount, 645330d8983SJohannes Doerfert uint32_t &NumThreads, 646330d8983SJohannes Doerfert bool IsNumThreadsFromUser) const { 64792376c3fSShilei Tian assert(!IsBareKernel && "bare kernel should not call this function"); 648330d8983SJohannes Doerfert 64992376c3fSShilei Tian assert(NumTeamsClause[1] == 1 && NumTeamsClause[2] == 1 && 65092376c3fSShilei Tian "Multi dimensional launch not supported yet."); 651330d8983SJohannes Doerfert 652330d8983SJohannes Doerfert if (NumTeamsClause[0] > 0) { 653330d8983SJohannes Doerfert // TODO: We need to honor any value and consequently allow more than the 654330d8983SJohannes Doerfert // block limit. For this we might need to start multiple kernels or let the 655330d8983SJohannes Doerfert // blocks start again until the requested number has been started. 656330d8983SJohannes Doerfert return std::min(NumTeamsClause[0], GenericDevice.getBlockLimit()); 657330d8983SJohannes Doerfert } 658330d8983SJohannes Doerfert 659330d8983SJohannes Doerfert uint64_t DefaultNumBlocks = GenericDevice.getDefaultNumBlocks(); 660330d8983SJohannes Doerfert uint64_t TripCountNumBlocks = std::numeric_limits<uint64_t>::max(); 661330d8983SJohannes Doerfert if (LoopTripCount > 0) { 662330d8983SJohannes Doerfert if (isSPMDMode()) { 663330d8983SJohannes Doerfert // We have a combined construct, i.e. `target teams distribute 664330d8983SJohannes Doerfert // parallel for [simd]`. We launch so many teams so that each thread 665330d8983SJohannes Doerfert // will execute one iteration of the loop; rounded up to the nearest 666330d8983SJohannes Doerfert // integer. However, if that results in too few teams, we artificially 667330d8983SJohannes Doerfert // reduce the thread count per team to increase the outer parallelism. 668330d8983SJohannes Doerfert auto MinThreads = GenericDevice.getMinThreadsForLowTripCountLoop(); 669330d8983SJohannes Doerfert MinThreads = std::min(MinThreads, NumThreads); 670330d8983SJohannes Doerfert 671330d8983SJohannes Doerfert // Honor the thread_limit clause; only lower the number of threads. 672330d8983SJohannes Doerfert [[maybe_unused]] auto OldNumThreads = NumThreads; 673330d8983SJohannes Doerfert if (LoopTripCount >= DefaultNumBlocks * NumThreads || 674330d8983SJohannes Doerfert IsNumThreadsFromUser) { 675330d8983SJohannes Doerfert // Enough parallelism for teams and threads. 676330d8983SJohannes Doerfert TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1; 677330d8983SJohannes Doerfert assert(IsNumThreadsFromUser || 678330d8983SJohannes Doerfert TripCountNumBlocks >= DefaultNumBlocks && 679330d8983SJohannes Doerfert "Expected sufficient outer parallelism."); 680330d8983SJohannes Doerfert } else if (LoopTripCount >= DefaultNumBlocks * MinThreads) { 681330d8983SJohannes Doerfert // Enough parallelism for teams, limit threads. 682330d8983SJohannes Doerfert 683330d8983SJohannes Doerfert // This case is hard; for now, we force "full warps": 684330d8983SJohannes Doerfert // First, compute a thread count assuming DefaultNumBlocks. 685330d8983SJohannes Doerfert auto NumThreadsDefaultBlocks = 686330d8983SJohannes Doerfert (LoopTripCount + DefaultNumBlocks - 1) / DefaultNumBlocks; 687330d8983SJohannes Doerfert // Now get a power of two that is larger or equal. 688330d8983SJohannes Doerfert auto NumThreadsDefaultBlocksP2 = 689330d8983SJohannes Doerfert llvm::PowerOf2Ceil(NumThreadsDefaultBlocks); 690330d8983SJohannes Doerfert // Do not increase a thread limit given be the user. 691330d8983SJohannes Doerfert NumThreads = std::min(NumThreads, uint32_t(NumThreadsDefaultBlocksP2)); 692330d8983SJohannes Doerfert assert(NumThreads >= MinThreads && 693330d8983SJohannes Doerfert "Expected sufficient inner parallelism."); 694330d8983SJohannes Doerfert TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1; 695330d8983SJohannes Doerfert } else { 696330d8983SJohannes Doerfert // Not enough parallelism for teams and threads, limit both. 697330d8983SJohannes Doerfert NumThreads = std::min(NumThreads, MinThreads); 698330d8983SJohannes Doerfert TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1; 699330d8983SJohannes Doerfert } 700330d8983SJohannes Doerfert 701330d8983SJohannes Doerfert assert(NumThreads * TripCountNumBlocks >= LoopTripCount && 702330d8983SJohannes Doerfert "Expected sufficient parallelism"); 703330d8983SJohannes Doerfert assert(OldNumThreads >= NumThreads && 704330d8983SJohannes Doerfert "Number of threads cannot be increased!"); 705330d8983SJohannes Doerfert } else { 706330d8983SJohannes Doerfert assert((isGenericMode() || isGenericSPMDMode()) && 707330d8983SJohannes Doerfert "Unexpected execution mode!"); 708330d8983SJohannes Doerfert // If we reach this point, then we have a non-combined construct, i.e. 709330d8983SJohannes Doerfert // `teams distribute` with a nested `parallel for` and each team is 710330d8983SJohannes Doerfert // assigned one iteration of the `distribute` loop. E.g.: 711330d8983SJohannes Doerfert // 712330d8983SJohannes Doerfert // #pragma omp target teams distribute 713330d8983SJohannes Doerfert // for(...loop_tripcount...) { 714330d8983SJohannes Doerfert // #pragma omp parallel for 715330d8983SJohannes Doerfert // for(...) {} 716330d8983SJohannes Doerfert // } 717330d8983SJohannes Doerfert // 718330d8983SJohannes Doerfert // Threads within a team will execute the iterations of the `parallel` 719330d8983SJohannes Doerfert // loop. 720330d8983SJohannes Doerfert TripCountNumBlocks = LoopTripCount; 721330d8983SJohannes Doerfert } 722330d8983SJohannes Doerfert } 723597d2f76STim Gymnich 724597d2f76STim Gymnich uint32_t PreferredNumBlocks = TripCountNumBlocks; 725330d8983SJohannes Doerfert // If the loops are long running we rather reuse blocks than spawn too many. 726597d2f76STim Gymnich if (GenericDevice.getReuseBlocksForHighTripCount()) 727597d2f76STim Gymnich PreferredNumBlocks = std::min(TripCountNumBlocks, DefaultNumBlocks); 728330d8983SJohannes Doerfert return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit()); 729330d8983SJohannes Doerfert } 730330d8983SJohannes Doerfert 731330d8983SJohannes Doerfert GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, 732330d8983SJohannes Doerfert int32_t NumDevices, 733330d8983SJohannes Doerfert const llvm::omp::GV &OMPGridValues) 734330d8983SJohannes Doerfert : Plugin(Plugin), MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"), 735330d8983SJohannes Doerfert OMP_NumTeams("OMP_NUM_TEAMS"), 736330d8983SJohannes Doerfert OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"), 737330d8983SJohannes Doerfert OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"), 738330d8983SJohannes Doerfert OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"), 739330d8983SJohannes Doerfert // Do not initialize the following two envars since they depend on the 740330d8983SJohannes Doerfert // device initialization. These cannot be consulted until the device is 741330d8983SJohannes Doerfert // initialized correctly. We intialize them in GenericDeviceTy::init(). 742330d8983SJohannes Doerfert OMPX_TargetStackSize(), OMPX_TargetHeapSize(), 743330d8983SJohannes Doerfert // By default, the initial number of streams and events is 1. 744330d8983SJohannes Doerfert OMPX_InitialNumStreams("LIBOMPTARGET_NUM_INITIAL_STREAMS", 1), 745330d8983SJohannes Doerfert OMPX_InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", 1), 746330d8983SJohannes Doerfert DeviceId(DeviceId), GridValues(OMPGridValues), 747330d8983SJohannes Doerfert PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(), 748330d8983SJohannes Doerfert PinnedAllocs(*this), RPCServer(nullptr) { 749330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT 750330d8983SJohannes Doerfert OmptInitialized.store(false); 751330d8983SJohannes Doerfert // Bind the callbacks to this device's member functions 752330d8983SJohannes Doerfert #define bindOmptCallback(Name, Type, Code) \ 753330d8983SJohannes Doerfert if (ompt::Initialized && ompt::lookupCallbackByCode) { \ 754330d8983SJohannes Doerfert ompt::lookupCallbackByCode((ompt_callbacks_t)(Code), \ 755330d8983SJohannes Doerfert ((ompt_callback_t *)&(Name##_fn))); \ 756330d8983SJohannes Doerfert DP("OMPT: class bound %s=%p\n", #Name, ((void *)(uint64_t)Name##_fn)); \ 757330d8983SJohannes Doerfert } 758330d8983SJohannes Doerfert 759330d8983SJohannes Doerfert FOREACH_OMPT_DEVICE_EVENT(bindOmptCallback); 760330d8983SJohannes Doerfert #undef bindOmptCallback 761330d8983SJohannes Doerfert 762330d8983SJohannes Doerfert #endif 763330d8983SJohannes Doerfert } 764330d8983SJohannes Doerfert 765330d8983SJohannes Doerfert Error GenericDeviceTy::init(GenericPluginTy &Plugin) { 766330d8983SJohannes Doerfert if (auto Err = initImpl(Plugin)) 767330d8983SJohannes Doerfert return Err; 768330d8983SJohannes Doerfert 769330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT 770330d8983SJohannes Doerfert if (ompt::Initialized) { 771330d8983SJohannes Doerfert bool ExpectedStatus = false; 772330d8983SJohannes Doerfert if (OmptInitialized.compare_exchange_strong(ExpectedStatus, true)) 773435aa766SJoseph Huber performOmptCallback(device_initialize, Plugin.getUserId(DeviceId), 774330d8983SJohannes Doerfert /*type=*/getComputeUnitKind().c_str(), 775330d8983SJohannes Doerfert /*device=*/reinterpret_cast<ompt_device_t *>(this), 776330d8983SJohannes Doerfert /*lookup=*/ompt::lookupCallbackByName, 777330d8983SJohannes Doerfert /*documentation=*/nullptr); 778330d8983SJohannes Doerfert } 779330d8983SJohannes Doerfert #endif 780330d8983SJohannes Doerfert 781330d8983SJohannes Doerfert // Read and reinitialize the envars that depend on the device initialization. 782330d8983SJohannes Doerfert // Notice these two envars may change the stack size and heap size of the 783330d8983SJohannes Doerfert // device, so they need the device properly initialized. 784330d8983SJohannes Doerfert auto StackSizeEnvarOrErr = UInt64Envar::create( 785330d8983SJohannes Doerfert "LIBOMPTARGET_STACK_SIZE", 786330d8983SJohannes Doerfert [this](uint64_t &V) -> Error { return getDeviceStackSize(V); }, 787330d8983SJohannes Doerfert [this](uint64_t V) -> Error { return setDeviceStackSize(V); }); 788330d8983SJohannes Doerfert if (!StackSizeEnvarOrErr) 789330d8983SJohannes Doerfert return StackSizeEnvarOrErr.takeError(); 790330d8983SJohannes Doerfert OMPX_TargetStackSize = std::move(*StackSizeEnvarOrErr); 791330d8983SJohannes Doerfert 792330d8983SJohannes Doerfert auto HeapSizeEnvarOrErr = UInt64Envar::create( 793330d8983SJohannes Doerfert "LIBOMPTARGET_HEAP_SIZE", 794330d8983SJohannes Doerfert [this](uint64_t &V) -> Error { return getDeviceHeapSize(V); }, 795330d8983SJohannes Doerfert [this](uint64_t V) -> Error { return setDeviceHeapSize(V); }); 796330d8983SJohannes Doerfert if (!HeapSizeEnvarOrErr) 797330d8983SJohannes Doerfert return HeapSizeEnvarOrErr.takeError(); 798330d8983SJohannes Doerfert OMPX_TargetHeapSize = std::move(*HeapSizeEnvarOrErr); 799330d8983SJohannes Doerfert 800330d8983SJohannes Doerfert // Update the maximum number of teams and threads after the device 801330d8983SJohannes Doerfert // initialization sets the corresponding hardware limit. 802330d8983SJohannes Doerfert if (OMP_NumTeams > 0) 803330d8983SJohannes Doerfert GridValues.GV_Max_Teams = 804330d8983SJohannes Doerfert std::min(GridValues.GV_Max_Teams, uint32_t(OMP_NumTeams)); 805330d8983SJohannes Doerfert 806330d8983SJohannes Doerfert if (OMP_TeamsThreadLimit > 0) 807330d8983SJohannes Doerfert GridValues.GV_Max_WG_Size = 808330d8983SJohannes Doerfert std::min(GridValues.GV_Max_WG_Size, uint32_t(OMP_TeamsThreadLimit)); 809330d8983SJohannes Doerfert 810330d8983SJohannes Doerfert // Enable the memory manager if required. 811330d8983SJohannes Doerfert auto [ThresholdMM, EnableMM] = MemoryManagerTy::getSizeThresholdFromEnv(); 812330d8983SJohannes Doerfert if (EnableMM) 813330d8983SJohannes Doerfert MemoryManager = new MemoryManagerTy(*this, ThresholdMM); 814330d8983SJohannes Doerfert 815330d8983SJohannes Doerfert return Plugin::success(); 816330d8983SJohannes Doerfert } 817330d8983SJohannes Doerfert 818330d8983SJohannes Doerfert Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { 819330d8983SJohannes Doerfert for (DeviceImageTy *Image : LoadedImages) 820330d8983SJohannes Doerfert if (auto Err = callGlobalDestructors(Plugin, *Image)) 821330d8983SJohannes Doerfert return Err; 822330d8983SJohannes Doerfert 823330d8983SJohannes Doerfert if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) { 824330d8983SJohannes Doerfert GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); 825330d8983SJohannes Doerfert for (auto *Image : LoadedImages) { 826330d8983SJohannes Doerfert DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0}; 827330d8983SJohannes Doerfert GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker", 828330d8983SJohannes Doerfert sizeof(DeviceMemoryPoolTrackingTy), 829330d8983SJohannes Doerfert &ImageDeviceMemoryPoolTracking); 830330d8983SJohannes Doerfert if (auto Err = 831330d8983SJohannes Doerfert GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) { 832330d8983SJohannes Doerfert consumeError(std::move(Err)); 833330d8983SJohannes Doerfert continue; 834330d8983SJohannes Doerfert } 835330d8983SJohannes Doerfert DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking); 836330d8983SJohannes Doerfert } 837330d8983SJohannes Doerfert 838330d8983SJohannes Doerfert // TODO: Write this by default into a file. 839330d8983SJohannes Doerfert printf("\n\n|-----------------------\n" 840330d8983SJohannes Doerfert "| Device memory tracker:\n" 841330d8983SJohannes Doerfert "|-----------------------\n" 842330d8983SJohannes Doerfert "| #Allocations: %lu\n" 843330d8983SJohannes Doerfert "| Byes allocated: %lu\n" 844330d8983SJohannes Doerfert "| Minimal allocation: %lu\n" 845330d8983SJohannes Doerfert "| Maximal allocation: %lu\n" 846330d8983SJohannes Doerfert "|-----------------------\n\n\n", 847330d8983SJohannes Doerfert DeviceMemoryPoolTracking.NumAllocations, 848330d8983SJohannes Doerfert DeviceMemoryPoolTracking.AllocationTotal, 849330d8983SJohannes Doerfert DeviceMemoryPoolTracking.AllocationMin, 850330d8983SJohannes Doerfert DeviceMemoryPoolTracking.AllocationMax); 851330d8983SJohannes Doerfert } 852330d8983SJohannes Doerfert 853fde2d23eSEthan Luis McDonough for (auto *Image : LoadedImages) { 854fde2d23eSEthan Luis McDonough GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); 855fde2d23eSEthan Luis McDonough if (!Handler.hasProfilingGlobals(*this, *Image)) 856fde2d23eSEthan Luis McDonough continue; 857fde2d23eSEthan Luis McDonough 858fde2d23eSEthan Luis McDonough GPUProfGlobals profdata; 859fde2d23eSEthan Luis McDonough auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image); 860fde2d23eSEthan Luis McDonough if (!ProfOrErr) 861fde2d23eSEthan Luis McDonough return ProfOrErr.takeError(); 862fde2d23eSEthan Luis McDonough 863fde2d23eSEthan Luis McDonough // TODO: write data to profiling file 864fde2d23eSEthan Luis McDonough ProfOrErr->dump(); 865fde2d23eSEthan Luis McDonough } 866fde2d23eSEthan Luis McDonough 867330d8983SJohannes Doerfert // Delete the memory manager before deinitializing the device. Otherwise, 868330d8983SJohannes Doerfert // we may delete device allocations after the device is deinitialized. 869330d8983SJohannes Doerfert if (MemoryManager) 870330d8983SJohannes Doerfert delete MemoryManager; 871330d8983SJohannes Doerfert MemoryManager = nullptr; 872330d8983SJohannes Doerfert 873f42f57b5SJoseph Huber RecordReplayTy &RecordReplay = Plugin.getRecordReplay(); 874330d8983SJohannes Doerfert if (RecordReplay.isRecordingOrReplaying()) 875330d8983SJohannes Doerfert RecordReplay.deinit(); 876330d8983SJohannes Doerfert 877330d8983SJohannes Doerfert if (RPCServer) 878330d8983SJohannes Doerfert if (auto Err = RPCServer->deinitDevice(*this)) 879330d8983SJohannes Doerfert return Err; 880330d8983SJohannes Doerfert 881330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT 882330d8983SJohannes Doerfert if (ompt::Initialized) { 883330d8983SJohannes Doerfert bool ExpectedStatus = true; 884330d8983SJohannes Doerfert if (OmptInitialized.compare_exchange_strong(ExpectedStatus, false)) 885435aa766SJoseph Huber performOmptCallback(device_finalize, Plugin.getUserId(DeviceId)); 886330d8983SJohannes Doerfert } 887330d8983SJohannes Doerfert #endif 888330d8983SJohannes Doerfert 889330d8983SJohannes Doerfert return deinitImpl(); 890330d8983SJohannes Doerfert } 891330d8983SJohannes Doerfert Expected<DeviceImageTy *> 892330d8983SJohannes Doerfert GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, 893330d8983SJohannes Doerfert const __tgt_device_image *InputTgtImage) { 894330d8983SJohannes Doerfert assert(InputTgtImage && "Expected non-null target image"); 895330d8983SJohannes Doerfert DP("Load data from image " DPxMOD "\n", DPxPTR(InputTgtImage->ImageStart)); 896330d8983SJohannes Doerfert 897330d8983SJohannes Doerfert auto PostJITImageOrErr = Plugin.getJIT().process(*InputTgtImage, *this); 898330d8983SJohannes Doerfert if (!PostJITImageOrErr) { 899330d8983SJohannes Doerfert auto Err = PostJITImageOrErr.takeError(); 900330d8983SJohannes Doerfert REPORT("Failure to jit IR image %p on device %d: %s\n", InputTgtImage, 901330d8983SJohannes Doerfert DeviceId, toString(std::move(Err)).data()); 902330d8983SJohannes Doerfert return nullptr; 903330d8983SJohannes Doerfert } 904330d8983SJohannes Doerfert 905330d8983SJohannes Doerfert // Load the binary and allocate the image object. Use the next available id 906330d8983SJohannes Doerfert // for the image id, which is the number of previously loaded images. 907330d8983SJohannes Doerfert auto ImageOrErr = 908330d8983SJohannes Doerfert loadBinaryImpl(PostJITImageOrErr.get(), LoadedImages.size()); 909330d8983SJohannes Doerfert if (!ImageOrErr) 910330d8983SJohannes Doerfert return ImageOrErr.takeError(); 911330d8983SJohannes Doerfert 912330d8983SJohannes Doerfert DeviceImageTy *Image = *ImageOrErr; 913330d8983SJohannes Doerfert assert(Image != nullptr && "Invalid image"); 914330d8983SJohannes Doerfert if (InputTgtImage != PostJITImageOrErr.get()) 915330d8983SJohannes Doerfert Image->setTgtImageBitcode(InputTgtImage); 916330d8983SJohannes Doerfert 917330d8983SJohannes Doerfert // Add the image to list. 918330d8983SJohannes Doerfert LoadedImages.push_back(Image); 919330d8983SJohannes Doerfert 920330d8983SJohannes Doerfert // Setup the device environment if needed. 921330d8983SJohannes Doerfert if (auto Err = setupDeviceEnvironment(Plugin, *Image)) 922330d8983SJohannes Doerfert return std::move(Err); 923330d8983SJohannes Doerfert 924330d8983SJohannes Doerfert // Setup the global device memory pool if needed. 925f42f57b5SJoseph Huber if (!Plugin.getRecordReplay().isReplaying() && 926f42f57b5SJoseph Huber shouldSetupDeviceMemoryPool()) { 927330d8983SJohannes Doerfert uint64_t HeapSize; 928330d8983SJohannes Doerfert auto SizeOrErr = getDeviceHeapSize(HeapSize); 929330d8983SJohannes Doerfert if (SizeOrErr) { 930330d8983SJohannes Doerfert REPORT("No global device memory pool due to error: %s\n", 931330d8983SJohannes Doerfert toString(std::move(SizeOrErr)).data()); 932330d8983SJohannes Doerfert } else if (auto Err = setupDeviceMemoryPool(Plugin, *Image, HeapSize)) 933330d8983SJohannes Doerfert return std::move(Err); 934330d8983SJohannes Doerfert } 935330d8983SJohannes Doerfert 936330d8983SJohannes Doerfert if (auto Err = setupRPCServer(Plugin, *Image)) 937330d8983SJohannes Doerfert return std::move(Err); 938330d8983SJohannes Doerfert 939330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT 940330d8983SJohannes Doerfert if (ompt::Initialized) { 941330d8983SJohannes Doerfert size_t Bytes = 94208533a3eSJohannes Doerfert utils::getPtrDiff(InputTgtImage->ImageEnd, InputTgtImage->ImageStart); 943330d8983SJohannes Doerfert performOmptCallback( 944435aa766SJoseph Huber device_load, Plugin.getUserId(DeviceId), 945330d8983SJohannes Doerfert /*FileName=*/nullptr, /*FileOffset=*/0, /*VmaInFile=*/nullptr, 946330d8983SJohannes Doerfert /*ImgSize=*/Bytes, /*HostAddr=*/InputTgtImage->ImageStart, 947330d8983SJohannes Doerfert /*DeviceAddr=*/nullptr, /* FIXME: ModuleId */ 0); 948330d8983SJohannes Doerfert } 949330d8983SJohannes Doerfert #endif 950330d8983SJohannes Doerfert 951330d8983SJohannes Doerfert // Call any global constructors present on the device. 952330d8983SJohannes Doerfert if (auto Err = callGlobalConstructors(Plugin, *Image)) 953330d8983SJohannes Doerfert return std::move(Err); 954330d8983SJohannes Doerfert 955330d8983SJohannes Doerfert // Return the pointer to the table of entries. 956330d8983SJohannes Doerfert return Image; 957330d8983SJohannes Doerfert } 958330d8983SJohannes Doerfert 959330d8983SJohannes Doerfert Error GenericDeviceTy::setupDeviceEnvironment(GenericPluginTy &Plugin, 960330d8983SJohannes Doerfert DeviceImageTy &Image) { 961330d8983SJohannes Doerfert // There are some plugins that do not need this step. 962330d8983SJohannes Doerfert if (!shouldSetupDeviceEnvironment()) 963330d8983SJohannes Doerfert return Plugin::success(); 964330d8983SJohannes Doerfert 965330d8983SJohannes Doerfert // Obtain a table mapping host function pointers to device function pointers. 966330d8983SJohannes Doerfert auto CallTablePairOrErr = setupIndirectCallTable(Plugin, *this, Image); 967330d8983SJohannes Doerfert if (!CallTablePairOrErr) 968330d8983SJohannes Doerfert return CallTablePairOrErr.takeError(); 969330d8983SJohannes Doerfert 970330d8983SJohannes Doerfert DeviceEnvironmentTy DeviceEnvironment; 971330d8983SJohannes Doerfert DeviceEnvironment.DeviceDebugKind = OMPX_DebugKind; 972330d8983SJohannes Doerfert DeviceEnvironment.NumDevices = Plugin.getNumDevices(); 973330d8983SJohannes Doerfert // TODO: The device ID used here is not the real device ID used by OpenMP. 974330d8983SJohannes Doerfert DeviceEnvironment.DeviceNum = DeviceId; 975330d8983SJohannes Doerfert DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize; 976330d8983SJohannes Doerfert DeviceEnvironment.ClockFrequency = getClockFrequency(); 977330d8983SJohannes Doerfert DeviceEnvironment.IndirectCallTable = 978330d8983SJohannes Doerfert reinterpret_cast<uintptr_t>(CallTablePairOrErr->first); 979330d8983SJohannes Doerfert DeviceEnvironment.IndirectCallTableSize = CallTablePairOrErr->second; 980330d8983SJohannes Doerfert DeviceEnvironment.HardwareParallelism = getHardwareParallelism(); 981330d8983SJohannes Doerfert 982330d8983SJohannes Doerfert // Create the metainfo of the device environment global. 983330d8983SJohannes Doerfert GlobalTy DevEnvGlobal("__omp_rtl_device_environment", 984330d8983SJohannes Doerfert sizeof(DeviceEnvironmentTy), &DeviceEnvironment); 985330d8983SJohannes Doerfert 986330d8983SJohannes Doerfert // Write device environment values to the device. 987330d8983SJohannes Doerfert GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); 988330d8983SJohannes Doerfert if (auto Err = GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal)) { 989330d8983SJohannes Doerfert DP("Missing symbol %s, continue execution anyway.\n", 990330d8983SJohannes Doerfert DevEnvGlobal.getName().data()); 991330d8983SJohannes Doerfert consumeError(std::move(Err)); 992330d8983SJohannes Doerfert } 993330d8983SJohannes Doerfert return Plugin::success(); 994330d8983SJohannes Doerfert } 995330d8983SJohannes Doerfert 996330d8983SJohannes Doerfert Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin, 997330d8983SJohannes Doerfert DeviceImageTy &Image, 998330d8983SJohannes Doerfert uint64_t PoolSize) { 999330d8983SJohannes Doerfert // Free the old pool, if any. 1000330d8983SJohannes Doerfert if (DeviceMemoryPool.Ptr) { 1001330d8983SJohannes Doerfert if (auto Err = dataDelete(DeviceMemoryPool.Ptr, 1002330d8983SJohannes Doerfert TargetAllocTy::TARGET_ALLOC_DEVICE)) 1003330d8983SJohannes Doerfert return Err; 1004330d8983SJohannes Doerfert } 1005330d8983SJohannes Doerfert 1006330d8983SJohannes Doerfert DeviceMemoryPool.Size = PoolSize; 1007330d8983SJohannes Doerfert auto AllocOrErr = dataAlloc(PoolSize, /*HostPtr=*/nullptr, 1008330d8983SJohannes Doerfert TargetAllocTy::TARGET_ALLOC_DEVICE); 1009330d8983SJohannes Doerfert if (AllocOrErr) { 1010330d8983SJohannes Doerfert DeviceMemoryPool.Ptr = *AllocOrErr; 1011330d8983SJohannes Doerfert } else { 1012330d8983SJohannes Doerfert auto Err = AllocOrErr.takeError(); 1013330d8983SJohannes Doerfert REPORT("Failure to allocate device memory for global memory pool: %s\n", 1014330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1015330d8983SJohannes Doerfert DeviceMemoryPool.Ptr = nullptr; 1016330d8983SJohannes Doerfert DeviceMemoryPool.Size = 0; 1017330d8983SJohannes Doerfert } 1018330d8983SJohannes Doerfert 1019330d8983SJohannes Doerfert // Create the metainfo of the device environment global. 1020330d8983SJohannes Doerfert GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); 1021330d8983SJohannes Doerfert if (!GHandler.isSymbolInImage(*this, Image, 1022330d8983SJohannes Doerfert "__omp_rtl_device_memory_pool_tracker")) { 1023330d8983SJohannes Doerfert DP("Skip the memory pool as there is no tracker symbol in the image."); 1024330d8983SJohannes Doerfert return Error::success(); 1025330d8983SJohannes Doerfert } 1026330d8983SJohannes Doerfert 1027330d8983SJohannes Doerfert GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker", 1028330d8983SJohannes Doerfert sizeof(DeviceMemoryPoolTrackingTy), 1029330d8983SJohannes Doerfert &DeviceMemoryPoolTracking); 1030330d8983SJohannes Doerfert if (auto Err = GHandler.writeGlobalToDevice(*this, Image, TrackerGlobal)) 1031330d8983SJohannes Doerfert return Err; 1032330d8983SJohannes Doerfert 1033330d8983SJohannes Doerfert // Create the metainfo of the device environment global. 1034330d8983SJohannes Doerfert GlobalTy DevEnvGlobal("__omp_rtl_device_memory_pool", 1035330d8983SJohannes Doerfert sizeof(DeviceMemoryPoolTy), &DeviceMemoryPool); 1036330d8983SJohannes Doerfert 1037330d8983SJohannes Doerfert // Write device environment values to the device. 1038330d8983SJohannes Doerfert return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal); 1039330d8983SJohannes Doerfert } 1040330d8983SJohannes Doerfert 1041330d8983SJohannes Doerfert Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin, 1042330d8983SJohannes Doerfert DeviceImageTy &Image) { 1043330d8983SJohannes Doerfert // The plugin either does not need an RPC server or it is unavailible. 1044330d8983SJohannes Doerfert if (!shouldSetupRPCServer()) 1045330d8983SJohannes Doerfert return Plugin::success(); 1046330d8983SJohannes Doerfert 1047330d8983SJohannes Doerfert // Check if this device needs to run an RPC server. 1048330d8983SJohannes Doerfert RPCServerTy &Server = Plugin.getRPCServer(); 1049330d8983SJohannes Doerfert auto UsingOrErr = 1050330d8983SJohannes Doerfert Server.isDeviceUsingRPC(*this, Plugin.getGlobalHandler(), Image); 1051330d8983SJohannes Doerfert if (!UsingOrErr) 1052330d8983SJohannes Doerfert return UsingOrErr.takeError(); 1053330d8983SJohannes Doerfert 1054330d8983SJohannes Doerfert if (!UsingOrErr.get()) 1055330d8983SJohannes Doerfert return Plugin::success(); 1056330d8983SJohannes Doerfert 1057330d8983SJohannes Doerfert if (auto Err = Server.initDevice(*this, Plugin.getGlobalHandler(), Image)) 1058330d8983SJohannes Doerfert return Err; 1059330d8983SJohannes Doerfert 1060134401deSJoseph Huber if (auto Err = Server.startThread()) 1061134401deSJoseph Huber return Err; 1062134401deSJoseph Huber 1063330d8983SJohannes Doerfert RPCServer = &Server; 1064330d8983SJohannes Doerfert DP("Running an RPC server on device %d\n", getDeviceId()); 1065330d8983SJohannes Doerfert return Plugin::success(); 1066330d8983SJohannes Doerfert } 1067330d8983SJohannes Doerfert 1068330d8983SJohannes Doerfert Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr, 1069330d8983SJohannes Doerfert size_t Size, bool ExternallyLocked) { 1070330d8983SJohannes Doerfert // Insert the new entry into the map. 1071330d8983SJohannes Doerfert auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size, ExternallyLocked}); 1072330d8983SJohannes Doerfert if (!Res.second) 1073330d8983SJohannes Doerfert return Plugin::error("Cannot insert locked buffer entry"); 1074330d8983SJohannes Doerfert 1075330d8983SJohannes Doerfert // Check whether the next entry overlaps with the inserted entry. 1076330d8983SJohannes Doerfert auto It = std::next(Res.first); 1077330d8983SJohannes Doerfert if (It == Allocs.end()) 1078330d8983SJohannes Doerfert return Plugin::success(); 1079330d8983SJohannes Doerfert 1080330d8983SJohannes Doerfert const EntryTy *NextEntry = &(*It); 1081330d8983SJohannes Doerfert if (intersects(NextEntry->HstPtr, NextEntry->Size, HstPtr, Size)) 1082330d8983SJohannes Doerfert return Plugin::error("Partial overlapping not allowed in locked buffers"); 1083330d8983SJohannes Doerfert 1084330d8983SJohannes Doerfert return Plugin::success(); 1085330d8983SJohannes Doerfert } 1086330d8983SJohannes Doerfert 1087330d8983SJohannes Doerfert Error PinnedAllocationMapTy::eraseEntry(const EntryTy &Entry) { 1088330d8983SJohannes Doerfert // Erase the existing entry. Notice this requires an additional map lookup, 1089330d8983SJohannes Doerfert // but this should not be a performance issue. Using iterators would make 1090330d8983SJohannes Doerfert // the code more difficult to read. 1091330d8983SJohannes Doerfert size_t Erased = Allocs.erase({Entry.HstPtr}); 1092330d8983SJohannes Doerfert if (!Erased) 1093330d8983SJohannes Doerfert return Plugin::error("Cannot erase locked buffer entry"); 1094330d8983SJohannes Doerfert return Plugin::success(); 1095330d8983SJohannes Doerfert } 1096330d8983SJohannes Doerfert 1097330d8983SJohannes Doerfert Error PinnedAllocationMapTy::registerEntryUse(const EntryTy &Entry, 1098330d8983SJohannes Doerfert void *HstPtr, size_t Size) { 1099330d8983SJohannes Doerfert if (!contains(Entry.HstPtr, Entry.Size, HstPtr, Size)) 1100330d8983SJohannes Doerfert return Plugin::error("Partial overlapping not allowed in locked buffers"); 1101330d8983SJohannes Doerfert 1102330d8983SJohannes Doerfert ++Entry.References; 1103330d8983SJohannes Doerfert return Plugin::success(); 1104330d8983SJohannes Doerfert } 1105330d8983SJohannes Doerfert 1106330d8983SJohannes Doerfert Expected<bool> PinnedAllocationMapTy::unregisterEntryUse(const EntryTy &Entry) { 1107330d8983SJohannes Doerfert if (Entry.References == 0) 1108330d8983SJohannes Doerfert return Plugin::error("Invalid number of references"); 1109330d8983SJohannes Doerfert 1110330d8983SJohannes Doerfert // Return whether this was the last user. 1111330d8983SJohannes Doerfert return (--Entry.References == 0); 1112330d8983SJohannes Doerfert } 1113330d8983SJohannes Doerfert 1114330d8983SJohannes Doerfert Error PinnedAllocationMapTy::registerHostBuffer(void *HstPtr, 1115330d8983SJohannes Doerfert void *DevAccessiblePtr, 1116330d8983SJohannes Doerfert size_t Size) { 1117330d8983SJohannes Doerfert assert(HstPtr && "Invalid pointer"); 1118330d8983SJohannes Doerfert assert(DevAccessiblePtr && "Invalid pointer"); 1119330d8983SJohannes Doerfert assert(Size && "Invalid size"); 1120330d8983SJohannes Doerfert 1121330d8983SJohannes Doerfert std::lock_guard<std::shared_mutex> Lock(Mutex); 1122330d8983SJohannes Doerfert 1123330d8983SJohannes Doerfert // No pinned allocation should intersect. 1124330d8983SJohannes Doerfert const EntryTy *Entry = findIntersecting(HstPtr); 1125330d8983SJohannes Doerfert if (Entry) 1126330d8983SJohannes Doerfert return Plugin::error("Cannot insert entry due to an existing one"); 1127330d8983SJohannes Doerfert 1128330d8983SJohannes Doerfert // Now insert the new entry. 1129330d8983SJohannes Doerfert return insertEntry(HstPtr, DevAccessiblePtr, Size); 1130330d8983SJohannes Doerfert } 1131330d8983SJohannes Doerfert 1132330d8983SJohannes Doerfert Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) { 1133330d8983SJohannes Doerfert assert(HstPtr && "Invalid pointer"); 1134330d8983SJohannes Doerfert 1135330d8983SJohannes Doerfert std::lock_guard<std::shared_mutex> Lock(Mutex); 1136330d8983SJohannes Doerfert 1137330d8983SJohannes Doerfert const EntryTy *Entry = findIntersecting(HstPtr); 1138330d8983SJohannes Doerfert if (!Entry) 1139330d8983SJohannes Doerfert return Plugin::error("Cannot find locked buffer"); 1140330d8983SJohannes Doerfert 1141330d8983SJohannes Doerfert // The address in the entry should be the same we are unregistering. 1142330d8983SJohannes Doerfert if (Entry->HstPtr != HstPtr) 1143330d8983SJohannes Doerfert return Plugin::error("Unexpected host pointer in locked buffer entry"); 1144330d8983SJohannes Doerfert 1145330d8983SJohannes Doerfert // Unregister from the entry. 1146330d8983SJohannes Doerfert auto LastUseOrErr = unregisterEntryUse(*Entry); 1147330d8983SJohannes Doerfert if (!LastUseOrErr) 1148330d8983SJohannes Doerfert return LastUseOrErr.takeError(); 1149330d8983SJohannes Doerfert 1150330d8983SJohannes Doerfert // There should be no other references to the pinned allocation. 1151330d8983SJohannes Doerfert if (!(*LastUseOrErr)) 1152330d8983SJohannes Doerfert return Plugin::error("The locked buffer is still being used"); 1153330d8983SJohannes Doerfert 1154330d8983SJohannes Doerfert // Erase the entry from the map. 1155330d8983SJohannes Doerfert return eraseEntry(*Entry); 1156330d8983SJohannes Doerfert } 1157330d8983SJohannes Doerfert 1158330d8983SJohannes Doerfert Expected<void *> PinnedAllocationMapTy::lockHostBuffer(void *HstPtr, 1159330d8983SJohannes Doerfert size_t Size) { 1160330d8983SJohannes Doerfert assert(HstPtr && "Invalid pointer"); 1161330d8983SJohannes Doerfert assert(Size && "Invalid size"); 1162330d8983SJohannes Doerfert 1163330d8983SJohannes Doerfert std::lock_guard<std::shared_mutex> Lock(Mutex); 1164330d8983SJohannes Doerfert 1165330d8983SJohannes Doerfert const EntryTy *Entry = findIntersecting(HstPtr); 1166330d8983SJohannes Doerfert 1167330d8983SJohannes Doerfert if (Entry) { 1168330d8983SJohannes Doerfert // An already registered intersecting buffer was found. Register a new use. 1169330d8983SJohannes Doerfert if (auto Err = registerEntryUse(*Entry, HstPtr, Size)) 1170330d8983SJohannes Doerfert return std::move(Err); 1171330d8983SJohannes Doerfert 1172330d8983SJohannes Doerfert // Return the device accessible pointer with the correct offset. 117308533a3eSJohannes Doerfert return utils::advancePtr(Entry->DevAccessiblePtr, 117408533a3eSJohannes Doerfert utils::getPtrDiff(HstPtr, Entry->HstPtr)); 1175330d8983SJohannes Doerfert } 1176330d8983SJohannes Doerfert 1177330d8983SJohannes Doerfert // No intersecting registered allocation found in the map. First, lock the 1178330d8983SJohannes Doerfert // host buffer and retrieve the device accessible pointer. 1179330d8983SJohannes Doerfert auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size); 1180330d8983SJohannes Doerfert if (!DevAccessiblePtrOrErr) 1181330d8983SJohannes Doerfert return DevAccessiblePtrOrErr.takeError(); 1182330d8983SJohannes Doerfert 1183330d8983SJohannes Doerfert // Now insert the new entry into the map. 1184330d8983SJohannes Doerfert if (auto Err = insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size)) 1185330d8983SJohannes Doerfert return std::move(Err); 1186330d8983SJohannes Doerfert 1187330d8983SJohannes Doerfert // Return the device accessible pointer. 1188330d8983SJohannes Doerfert return *DevAccessiblePtrOrErr; 1189330d8983SJohannes Doerfert } 1190330d8983SJohannes Doerfert 1191330d8983SJohannes Doerfert Error PinnedAllocationMapTy::unlockHostBuffer(void *HstPtr) { 1192330d8983SJohannes Doerfert assert(HstPtr && "Invalid pointer"); 1193330d8983SJohannes Doerfert 1194330d8983SJohannes Doerfert std::lock_guard<std::shared_mutex> Lock(Mutex); 1195330d8983SJohannes Doerfert 1196330d8983SJohannes Doerfert const EntryTy *Entry = findIntersecting(HstPtr); 1197330d8983SJohannes Doerfert if (!Entry) 1198330d8983SJohannes Doerfert return Plugin::error("Cannot find locked buffer"); 1199330d8983SJohannes Doerfert 1200330d8983SJohannes Doerfert // Unregister from the locked buffer. No need to do anything if there are 1201330d8983SJohannes Doerfert // others using the allocation. 1202330d8983SJohannes Doerfert auto LastUseOrErr = unregisterEntryUse(*Entry); 1203330d8983SJohannes Doerfert if (!LastUseOrErr) 1204330d8983SJohannes Doerfert return LastUseOrErr.takeError(); 1205330d8983SJohannes Doerfert 1206330d8983SJohannes Doerfert // No need to do anything if there are others using the allocation. 1207330d8983SJohannes Doerfert if (!(*LastUseOrErr)) 1208330d8983SJohannes Doerfert return Plugin::success(); 1209330d8983SJohannes Doerfert 1210330d8983SJohannes Doerfert // This was the last user of the allocation. Unlock the original locked buffer 1211330d8983SJohannes Doerfert // if it was locked by the plugin. Do not unlock it if it was locked by an 1212330d8983SJohannes Doerfert // external entity. Unlock the buffer using the host pointer of the entry. 1213330d8983SJohannes Doerfert if (!Entry->ExternallyLocked) 1214330d8983SJohannes Doerfert if (auto Err = Device.dataUnlockImpl(Entry->HstPtr)) 1215330d8983SJohannes Doerfert return Err; 1216330d8983SJohannes Doerfert 1217330d8983SJohannes Doerfert // Erase the entry from the map. 1218330d8983SJohannes Doerfert return eraseEntry(*Entry); 1219330d8983SJohannes Doerfert } 1220330d8983SJohannes Doerfert 1221330d8983SJohannes Doerfert Error PinnedAllocationMapTy::lockMappedHostBuffer(void *HstPtr, size_t Size) { 1222330d8983SJohannes Doerfert assert(HstPtr && "Invalid pointer"); 1223330d8983SJohannes Doerfert assert(Size && "Invalid size"); 1224330d8983SJohannes Doerfert 1225330d8983SJohannes Doerfert std::lock_guard<std::shared_mutex> Lock(Mutex); 1226330d8983SJohannes Doerfert 1227330d8983SJohannes Doerfert // If previously registered, just register a new user on the entry. 1228330d8983SJohannes Doerfert const EntryTy *Entry = findIntersecting(HstPtr); 1229330d8983SJohannes Doerfert if (Entry) 1230330d8983SJohannes Doerfert return registerEntryUse(*Entry, HstPtr, Size); 1231330d8983SJohannes Doerfert 1232330d8983SJohannes Doerfert size_t BaseSize; 1233330d8983SJohannes Doerfert void *BaseHstPtr, *BaseDevAccessiblePtr; 1234330d8983SJohannes Doerfert 1235330d8983SJohannes Doerfert // Check if it was externally pinned by a vendor-specific API. 1236330d8983SJohannes Doerfert auto IsPinnedOrErr = Device.isPinnedPtrImpl(HstPtr, BaseHstPtr, 1237330d8983SJohannes Doerfert BaseDevAccessiblePtr, BaseSize); 1238330d8983SJohannes Doerfert if (!IsPinnedOrErr) 1239330d8983SJohannes Doerfert return IsPinnedOrErr.takeError(); 1240330d8983SJohannes Doerfert 1241330d8983SJohannes Doerfert // If pinned, just insert the entry representing the whole pinned buffer. 1242330d8983SJohannes Doerfert if (*IsPinnedOrErr) 1243330d8983SJohannes Doerfert return insertEntry(BaseHstPtr, BaseDevAccessiblePtr, BaseSize, 1244330d8983SJohannes Doerfert /*Externallylocked=*/true); 1245330d8983SJohannes Doerfert 1246330d8983SJohannes Doerfert // Not externally pinned. Do nothing if locking of mapped buffers is disabled. 1247330d8983SJohannes Doerfert if (!LockMappedBuffers) 1248330d8983SJohannes Doerfert return Plugin::success(); 1249330d8983SJohannes Doerfert 1250330d8983SJohannes Doerfert // Otherwise, lock the buffer and insert the new entry. 1251330d8983SJohannes Doerfert auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size); 1252330d8983SJohannes Doerfert if (!DevAccessiblePtrOrErr) { 1253330d8983SJohannes Doerfert // Errors may be tolerated. 1254330d8983SJohannes Doerfert if (!IgnoreLockMappedFailures) 1255330d8983SJohannes Doerfert return DevAccessiblePtrOrErr.takeError(); 1256330d8983SJohannes Doerfert 1257330d8983SJohannes Doerfert consumeError(DevAccessiblePtrOrErr.takeError()); 1258330d8983SJohannes Doerfert return Plugin::success(); 1259330d8983SJohannes Doerfert } 1260330d8983SJohannes Doerfert 1261330d8983SJohannes Doerfert return insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size); 1262330d8983SJohannes Doerfert } 1263330d8983SJohannes Doerfert 1264330d8983SJohannes Doerfert Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) { 1265330d8983SJohannes Doerfert assert(HstPtr && "Invalid pointer"); 1266330d8983SJohannes Doerfert 1267330d8983SJohannes Doerfert std::lock_guard<std::shared_mutex> Lock(Mutex); 1268330d8983SJohannes Doerfert 1269330d8983SJohannes Doerfert // Check whether there is any intersecting entry. 1270330d8983SJohannes Doerfert const EntryTy *Entry = findIntersecting(HstPtr); 1271330d8983SJohannes Doerfert 1272330d8983SJohannes Doerfert // No entry but automatic locking of mapped buffers is disabled, so 1273330d8983SJohannes Doerfert // nothing to do. 1274330d8983SJohannes Doerfert if (!Entry && !LockMappedBuffers) 1275330d8983SJohannes Doerfert return Plugin::success(); 1276330d8983SJohannes Doerfert 1277330d8983SJohannes Doerfert // No entry, automatic locking is enabled, but the locking may have failed, so 1278330d8983SJohannes Doerfert // do nothing. 1279330d8983SJohannes Doerfert if (!Entry && IgnoreLockMappedFailures) 1280330d8983SJohannes Doerfert return Plugin::success(); 1281330d8983SJohannes Doerfert 1282330d8983SJohannes Doerfert // No entry, but the automatic locking is enabled, so this is an error. 1283330d8983SJohannes Doerfert if (!Entry) 1284330d8983SJohannes Doerfert return Plugin::error("Locked buffer not found"); 1285330d8983SJohannes Doerfert 1286330d8983SJohannes Doerfert // There is entry, so unregister a user and check whether it was the last one. 1287330d8983SJohannes Doerfert auto LastUseOrErr = unregisterEntryUse(*Entry); 1288330d8983SJohannes Doerfert if (!LastUseOrErr) 1289330d8983SJohannes Doerfert return LastUseOrErr.takeError(); 1290330d8983SJohannes Doerfert 1291330d8983SJohannes Doerfert // If it is not the last one, there is nothing to do. 1292330d8983SJohannes Doerfert if (!(*LastUseOrErr)) 1293330d8983SJohannes Doerfert return Plugin::success(); 1294330d8983SJohannes Doerfert 1295330d8983SJohannes Doerfert // Otherwise, if it was the last and the buffer was locked by the plugin, 1296330d8983SJohannes Doerfert // unlock it. 1297330d8983SJohannes Doerfert if (!Entry->ExternallyLocked) 1298330d8983SJohannes Doerfert if (auto Err = Device.dataUnlockImpl(Entry->HstPtr)) 1299330d8983SJohannes Doerfert return Err; 1300330d8983SJohannes Doerfert 1301330d8983SJohannes Doerfert // Finally erase the entry from the map. 1302330d8983SJohannes Doerfert return eraseEntry(*Entry); 1303330d8983SJohannes Doerfert } 1304330d8983SJohannes Doerfert 1305330d8983SJohannes Doerfert Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) { 1306330d8983SJohannes Doerfert if (!AsyncInfo || !AsyncInfo->Queue) 1307330d8983SJohannes Doerfert return Plugin::error("Invalid async info queue"); 1308330d8983SJohannes Doerfert 1309330d8983SJohannes Doerfert if (auto Err = synchronizeImpl(*AsyncInfo)) 1310330d8983SJohannes Doerfert return Err; 1311330d8983SJohannes Doerfert 1312330d8983SJohannes Doerfert for (auto *Ptr : AsyncInfo->AssociatedAllocations) 1313330d8983SJohannes Doerfert if (auto Err = dataDelete(Ptr, TargetAllocTy::TARGET_ALLOC_DEVICE)) 1314330d8983SJohannes Doerfert return Err; 1315330d8983SJohannes Doerfert AsyncInfo->AssociatedAllocations.clear(); 1316330d8983SJohannes Doerfert 1317330d8983SJohannes Doerfert return Plugin::success(); 1318330d8983SJohannes Doerfert } 1319330d8983SJohannes Doerfert 1320330d8983SJohannes Doerfert Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) { 1321330d8983SJohannes Doerfert if (!AsyncInfo || !AsyncInfo->Queue) 1322330d8983SJohannes Doerfert return Plugin::error("Invalid async info queue"); 1323330d8983SJohannes Doerfert 1324330d8983SJohannes Doerfert return queryAsyncImpl(*AsyncInfo); 1325330d8983SJohannes Doerfert } 1326330d8983SJohannes Doerfert 1327330d8983SJohannes Doerfert Error GenericDeviceTy::memoryVAMap(void **Addr, void *VAddr, size_t *RSize) { 1328330d8983SJohannes Doerfert return Plugin::error("Device does not suppport VA Management"); 1329330d8983SJohannes Doerfert } 1330330d8983SJohannes Doerfert 1331330d8983SJohannes Doerfert Error GenericDeviceTy::memoryVAUnMap(void *VAddr, size_t Size) { 1332330d8983SJohannes Doerfert return Plugin::error("Device does not suppport VA Management"); 1333330d8983SJohannes Doerfert } 1334330d8983SJohannes Doerfert 1335330d8983SJohannes Doerfert Error GenericDeviceTy::getDeviceMemorySize(uint64_t &DSize) { 1336330d8983SJohannes Doerfert return Plugin::error( 1337330d8983SJohannes Doerfert "Mising getDeviceMemorySize impelmentation (required by RR-heuristic"); 1338330d8983SJohannes Doerfert } 1339330d8983SJohannes Doerfert 1340330d8983SJohannes Doerfert Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr, 1341330d8983SJohannes Doerfert TargetAllocTy Kind) { 1342330d8983SJohannes Doerfert void *Alloc = nullptr; 1343330d8983SJohannes Doerfert 1344f42f57b5SJoseph Huber if (Plugin.getRecordReplay().isRecordingOrReplaying()) 1345f42f57b5SJoseph Huber return Plugin.getRecordReplay().alloc(Size); 1346330d8983SJohannes Doerfert 1347330d8983SJohannes Doerfert switch (Kind) { 1348330d8983SJohannes Doerfert case TARGET_ALLOC_DEFAULT: 1349330d8983SJohannes Doerfert case TARGET_ALLOC_DEVICE_NON_BLOCKING: 1350330d8983SJohannes Doerfert case TARGET_ALLOC_DEVICE: 1351330d8983SJohannes Doerfert if (MemoryManager) { 1352330d8983SJohannes Doerfert Alloc = MemoryManager->allocate(Size, HostPtr); 1353330d8983SJohannes Doerfert if (!Alloc) 1354330d8983SJohannes Doerfert return Plugin::error("Failed to allocate from memory manager"); 1355330d8983SJohannes Doerfert break; 1356330d8983SJohannes Doerfert } 1357330d8983SJohannes Doerfert [[fallthrough]]; 1358330d8983SJohannes Doerfert case TARGET_ALLOC_HOST: 1359330d8983SJohannes Doerfert case TARGET_ALLOC_SHARED: 1360330d8983SJohannes Doerfert Alloc = allocate(Size, HostPtr, Kind); 1361330d8983SJohannes Doerfert if (!Alloc) 1362330d8983SJohannes Doerfert return Plugin::error("Failed to allocate from device allocator"); 1363330d8983SJohannes Doerfert } 1364330d8983SJohannes Doerfert 1365330d8983SJohannes Doerfert // Report error if the memory manager or the device allocator did not return 1366330d8983SJohannes Doerfert // any memory buffer. 1367330d8983SJohannes Doerfert if (!Alloc) 1368330d8983SJohannes Doerfert return Plugin::error("Invalid target data allocation kind or requested " 1369330d8983SJohannes Doerfert "allocator not implemented yet"); 1370330d8983SJohannes Doerfert 1371330d8983SJohannes Doerfert // Register allocated buffer as pinned memory if the type is host memory. 1372330d8983SJohannes Doerfert if (Kind == TARGET_ALLOC_HOST) 1373330d8983SJohannes Doerfert if (auto Err = PinnedAllocs.registerHostBuffer(Alloc, Alloc, Size)) 1374330d8983SJohannes Doerfert return std::move(Err); 1375330d8983SJohannes Doerfert 1376c95abe94SJohannes Doerfert // Keep track of the allocation stack if we track allocation traces. 1377c95abe94SJohannes Doerfert if (OMPX_TrackAllocationTraces) { 1378c95abe94SJohannes Doerfert std::string StackTrace; 1379c95abe94SJohannes Doerfert llvm::raw_string_ostream OS(StackTrace); 1380c95abe94SJohannes Doerfert llvm::sys::PrintStackTrace(OS); 1381c95abe94SJohannes Doerfert 1382c95abe94SJohannes Doerfert AllocationTraceInfoTy *ATI = new AllocationTraceInfoTy(); 1383c95abe94SJohannes Doerfert ATI->AllocationTrace = std::move(StackTrace); 1384c95abe94SJohannes Doerfert ATI->DevicePtr = Alloc; 1385c95abe94SJohannes Doerfert ATI->HostPtr = HostPtr; 1386c95abe94SJohannes Doerfert ATI->Size = Size; 1387c95abe94SJohannes Doerfert ATI->Kind = Kind; 1388c95abe94SJohannes Doerfert 1389c95abe94SJohannes Doerfert auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor(); 1390c95abe94SJohannes Doerfert auto *&MapATI = (*AllocationTraceMap)[Alloc]; 1391c95abe94SJohannes Doerfert ATI->LastAllocationInfo = MapATI; 1392c95abe94SJohannes Doerfert MapATI = ATI; 1393c95abe94SJohannes Doerfert } 1394c95abe94SJohannes Doerfert 1395330d8983SJohannes Doerfert return Alloc; 1396330d8983SJohannes Doerfert } 1397330d8983SJohannes Doerfert 1398330d8983SJohannes Doerfert Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) { 1399330d8983SJohannes Doerfert // Free is a noop when recording or replaying. 1400f42f57b5SJoseph Huber if (Plugin.getRecordReplay().isRecordingOrReplaying()) 1401330d8983SJohannes Doerfert return Plugin::success(); 1402330d8983SJohannes Doerfert 1403c95abe94SJohannes Doerfert // Keep track of the deallocation stack if we track allocation traces. 1404c95abe94SJohannes Doerfert if (OMPX_TrackAllocationTraces) { 1405c95abe94SJohannes Doerfert AllocationTraceInfoTy *ATI = nullptr; 1406c95abe94SJohannes Doerfert { 1407c95abe94SJohannes Doerfert auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor(); 1408c95abe94SJohannes Doerfert ATI = (*AllocationTraceMap)[TgtPtr]; 1409c95abe94SJohannes Doerfert } 1410c95abe94SJohannes Doerfert 1411c95abe94SJohannes Doerfert std::string StackTrace; 1412c95abe94SJohannes Doerfert llvm::raw_string_ostream OS(StackTrace); 1413c95abe94SJohannes Doerfert llvm::sys::PrintStackTrace(OS); 1414c95abe94SJohannes Doerfert 1415c95abe94SJohannes Doerfert if (!ATI) 1416c95abe94SJohannes Doerfert ErrorReporter::reportDeallocationOfNonAllocatedPtr(TgtPtr, Kind, ATI, 1417c95abe94SJohannes Doerfert StackTrace); 1418c95abe94SJohannes Doerfert 1419c95abe94SJohannes Doerfert // ATI is not null, thus we can lock it to inspect and modify it further. 1420c95abe94SJohannes Doerfert std::lock_guard<std::mutex> LG(ATI->Lock); 1421c95abe94SJohannes Doerfert if (!ATI->DeallocationTrace.empty()) 1422c95abe94SJohannes Doerfert ErrorReporter::reportDeallocationOfDeallocatedPtr(TgtPtr, Kind, ATI, 1423c95abe94SJohannes Doerfert StackTrace); 1424c95abe94SJohannes Doerfert 1425c95abe94SJohannes Doerfert if (ATI->Kind != Kind) 1426c95abe94SJohannes Doerfert ErrorReporter::reportDeallocationOfWrongPtrKind(TgtPtr, Kind, ATI, 1427c95abe94SJohannes Doerfert StackTrace); 1428c95abe94SJohannes Doerfert 1429c95abe94SJohannes Doerfert ATI->DeallocationTrace = StackTrace; 1430c95abe94SJohannes Doerfert 1431c95abe94SJohannes Doerfert #undef DEALLOCATION_ERROR 1432c95abe94SJohannes Doerfert } 1433c95abe94SJohannes Doerfert 1434330d8983SJohannes Doerfert int Res; 1435b438a817SJhonatan Cléto switch (Kind) { 1436b438a817SJhonatan Cléto case TARGET_ALLOC_DEFAULT: 1437b438a817SJhonatan Cléto case TARGET_ALLOC_DEVICE_NON_BLOCKING: 1438b438a817SJhonatan Cléto case TARGET_ALLOC_DEVICE: 1439b438a817SJhonatan Cléto if (MemoryManager) { 1440330d8983SJohannes Doerfert Res = MemoryManager->free(TgtPtr); 1441330d8983SJohannes Doerfert if (Res) 1442b438a817SJhonatan Cléto return Plugin::error( 1443b438a817SJhonatan Cléto "Failure to deallocate device pointer %p via memory manager", 1444b438a817SJhonatan Cléto TgtPtr); 1445b438a817SJhonatan Cléto break; 1446b438a817SJhonatan Cléto } 1447b438a817SJhonatan Cléto [[fallthrough]]; 1448b438a817SJhonatan Cléto case TARGET_ALLOC_HOST: 1449b438a817SJhonatan Cléto case TARGET_ALLOC_SHARED: 1450b438a817SJhonatan Cléto Res = free(TgtPtr, Kind); 1451b438a817SJhonatan Cléto if (Res) 1452b438a817SJhonatan Cléto return Plugin::error( 1453b438a817SJhonatan Cléto "Failure to deallocate device pointer %p via device deallocator", 1454b438a817SJhonatan Cléto TgtPtr); 1455b438a817SJhonatan Cléto } 1456330d8983SJohannes Doerfert 1457330d8983SJohannes Doerfert // Unregister deallocated pinned memory buffer if the type is host memory. 1458330d8983SJohannes Doerfert if (Kind == TARGET_ALLOC_HOST) 1459330d8983SJohannes Doerfert if (auto Err = PinnedAllocs.unregisterHostBuffer(TgtPtr)) 1460330d8983SJohannes Doerfert return Err; 1461330d8983SJohannes Doerfert 1462330d8983SJohannes Doerfert return Plugin::success(); 1463330d8983SJohannes Doerfert } 1464330d8983SJohannes Doerfert 1465330d8983SJohannes Doerfert Error GenericDeviceTy::dataSubmit(void *TgtPtr, const void *HstPtr, 1466330d8983SJohannes Doerfert int64_t Size, __tgt_async_info *AsyncInfo) { 1467330d8983SJohannes Doerfert AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); 1468330d8983SJohannes Doerfert 1469330d8983SJohannes Doerfert auto Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper); 1470330d8983SJohannes Doerfert AsyncInfoWrapper.finalize(Err); 1471330d8983SJohannes Doerfert return Err; 1472330d8983SJohannes Doerfert } 1473330d8983SJohannes Doerfert 1474330d8983SJohannes Doerfert Error GenericDeviceTy::dataRetrieve(void *HstPtr, const void *TgtPtr, 1475330d8983SJohannes Doerfert int64_t Size, __tgt_async_info *AsyncInfo) { 1476330d8983SJohannes Doerfert AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); 1477330d8983SJohannes Doerfert 1478330d8983SJohannes Doerfert auto Err = dataRetrieveImpl(HstPtr, TgtPtr, Size, AsyncInfoWrapper); 1479330d8983SJohannes Doerfert AsyncInfoWrapper.finalize(Err); 1480330d8983SJohannes Doerfert return Err; 1481330d8983SJohannes Doerfert } 1482330d8983SJohannes Doerfert 1483330d8983SJohannes Doerfert Error GenericDeviceTy::dataExchange(const void *SrcPtr, GenericDeviceTy &DstDev, 1484330d8983SJohannes Doerfert void *DstPtr, int64_t Size, 1485330d8983SJohannes Doerfert __tgt_async_info *AsyncInfo) { 1486330d8983SJohannes Doerfert AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); 1487330d8983SJohannes Doerfert 1488330d8983SJohannes Doerfert auto Err = dataExchangeImpl(SrcPtr, DstDev, DstPtr, Size, AsyncInfoWrapper); 1489330d8983SJohannes Doerfert AsyncInfoWrapper.finalize(Err); 1490330d8983SJohannes Doerfert return Err; 1491330d8983SJohannes Doerfert } 1492330d8983SJohannes Doerfert 1493330d8983SJohannes Doerfert Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs, 1494330d8983SJohannes Doerfert ptrdiff_t *ArgOffsets, 1495330d8983SJohannes Doerfert KernelArgsTy &KernelArgs, 1496330d8983SJohannes Doerfert __tgt_async_info *AsyncInfo) { 1497330d8983SJohannes Doerfert AsyncInfoWrapperTy AsyncInfoWrapper( 1498f42f57b5SJoseph Huber *this, 1499f42f57b5SJoseph Huber Plugin.getRecordReplay().isRecordingOrReplaying() ? nullptr : AsyncInfo); 1500330d8983SJohannes Doerfert 1501330d8983SJohannes Doerfert GenericKernelTy &GenericKernel = 1502330d8983SJohannes Doerfert *reinterpret_cast<GenericKernelTy *>(EntryPtr); 1503330d8983SJohannes Doerfert 15049a101322SJohannes Doerfert { 15059a101322SJohannes Doerfert std::string StackTrace; 15069a101322SJohannes Doerfert if (OMPX_TrackNumKernelLaunches) { 15079a101322SJohannes Doerfert llvm::raw_string_ostream OS(StackTrace); 15089a101322SJohannes Doerfert llvm::sys::PrintStackTrace(OS); 15099a101322SJohannes Doerfert } 15109a101322SJohannes Doerfert 15119a101322SJohannes Doerfert auto KernelTraceInfoRecord = KernelLaunchTraces.getExclusiveAccessor(); 15129a101322SJohannes Doerfert (*KernelTraceInfoRecord) 15139a101322SJohannes Doerfert .emplace(&GenericKernel, std::move(StackTrace), AsyncInfo); 15149a101322SJohannes Doerfert } 15159a101322SJohannes Doerfert 1516330d8983SJohannes Doerfert auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs, 1517330d8983SJohannes Doerfert AsyncInfoWrapper); 1518330d8983SJohannes Doerfert 1519330d8983SJohannes Doerfert // 'finalize' here to guarantee next record-replay actions are in-sync 1520330d8983SJohannes Doerfert AsyncInfoWrapper.finalize(Err); 1521330d8983SJohannes Doerfert 1522f42f57b5SJoseph Huber RecordReplayTy &RecordReplay = Plugin.getRecordReplay(); 1523330d8983SJohannes Doerfert if (RecordReplay.isRecordingOrReplaying() && 1524330d8983SJohannes Doerfert RecordReplay.isSaveOutputEnabled()) 1525330d8983SJohannes Doerfert RecordReplay.saveKernelOutputInfo(GenericKernel.getName()); 1526330d8983SJohannes Doerfert 1527330d8983SJohannes Doerfert return Err; 1528330d8983SJohannes Doerfert } 1529330d8983SJohannes Doerfert 1530330d8983SJohannes Doerfert Error GenericDeviceTy::initAsyncInfo(__tgt_async_info **AsyncInfoPtr) { 1531330d8983SJohannes Doerfert assert(AsyncInfoPtr && "Invalid async info"); 1532330d8983SJohannes Doerfert 1533330d8983SJohannes Doerfert *AsyncInfoPtr = new __tgt_async_info(); 1534330d8983SJohannes Doerfert 1535330d8983SJohannes Doerfert AsyncInfoWrapperTy AsyncInfoWrapper(*this, *AsyncInfoPtr); 1536330d8983SJohannes Doerfert 1537330d8983SJohannes Doerfert auto Err = initAsyncInfoImpl(AsyncInfoWrapper); 1538330d8983SJohannes Doerfert AsyncInfoWrapper.finalize(Err); 1539330d8983SJohannes Doerfert return Err; 1540330d8983SJohannes Doerfert } 1541330d8983SJohannes Doerfert 1542330d8983SJohannes Doerfert Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) { 1543330d8983SJohannes Doerfert assert(DeviceInfo && "Invalid device info"); 1544330d8983SJohannes Doerfert 1545330d8983SJohannes Doerfert return initDeviceInfoImpl(DeviceInfo); 1546330d8983SJohannes Doerfert } 1547330d8983SJohannes Doerfert 1548330d8983SJohannes Doerfert Error GenericDeviceTy::printInfo() { 1549330d8983SJohannes Doerfert InfoQueueTy InfoQueue; 1550330d8983SJohannes Doerfert 1551330d8983SJohannes Doerfert // Get the vendor-specific info entries describing the device properties. 1552330d8983SJohannes Doerfert if (auto Err = obtainInfoImpl(InfoQueue)) 1553330d8983SJohannes Doerfert return Err; 1554330d8983SJohannes Doerfert 1555330d8983SJohannes Doerfert // Print all info entries. 1556330d8983SJohannes Doerfert InfoQueue.print(); 1557330d8983SJohannes Doerfert 1558330d8983SJohannes Doerfert return Plugin::success(); 1559330d8983SJohannes Doerfert } 1560330d8983SJohannes Doerfert 1561330d8983SJohannes Doerfert Error GenericDeviceTy::createEvent(void **EventPtrStorage) { 1562330d8983SJohannes Doerfert return createEventImpl(EventPtrStorage); 1563330d8983SJohannes Doerfert } 1564330d8983SJohannes Doerfert 1565330d8983SJohannes Doerfert Error GenericDeviceTy::destroyEvent(void *EventPtr) { 1566330d8983SJohannes Doerfert return destroyEventImpl(EventPtr); 1567330d8983SJohannes Doerfert } 1568330d8983SJohannes Doerfert 1569330d8983SJohannes Doerfert Error GenericDeviceTy::recordEvent(void *EventPtr, 1570330d8983SJohannes Doerfert __tgt_async_info *AsyncInfo) { 1571330d8983SJohannes Doerfert AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); 1572330d8983SJohannes Doerfert 1573330d8983SJohannes Doerfert auto Err = recordEventImpl(EventPtr, AsyncInfoWrapper); 1574330d8983SJohannes Doerfert AsyncInfoWrapper.finalize(Err); 1575330d8983SJohannes Doerfert return Err; 1576330d8983SJohannes Doerfert } 1577330d8983SJohannes Doerfert 1578330d8983SJohannes Doerfert Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) { 1579330d8983SJohannes Doerfert AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); 1580330d8983SJohannes Doerfert 1581330d8983SJohannes Doerfert auto Err = waitEventImpl(EventPtr, AsyncInfoWrapper); 1582330d8983SJohannes Doerfert AsyncInfoWrapper.finalize(Err); 1583330d8983SJohannes Doerfert return Err; 1584330d8983SJohannes Doerfert } 1585330d8983SJohannes Doerfert 1586330d8983SJohannes Doerfert Error GenericDeviceTy::syncEvent(void *EventPtr) { 1587330d8983SJohannes Doerfert return syncEventImpl(EventPtr); 1588330d8983SJohannes Doerfert } 1589330d8983SJohannes Doerfert 1590330d8983SJohannes Doerfert bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); } 1591330d8983SJohannes Doerfert 1592330d8983SJohannes Doerfert Error GenericPluginTy::init() { 1593435aa766SJoseph Huber if (Initialized) 1594435aa766SJoseph Huber return Plugin::success(); 1595435aa766SJoseph Huber 1596330d8983SJohannes Doerfert auto NumDevicesOrErr = initImpl(); 1597330d8983SJohannes Doerfert if (!NumDevicesOrErr) 1598330d8983SJohannes Doerfert return NumDevicesOrErr.takeError(); 159921f3a609SJoseph Huber Initialized = true; 1600435aa766SJoseph Huber 1601330d8983SJohannes Doerfert NumDevices = *NumDevicesOrErr; 1602330d8983SJohannes Doerfert if (NumDevices == 0) 1603330d8983SJohannes Doerfert return Plugin::success(); 1604330d8983SJohannes Doerfert 1605330d8983SJohannes Doerfert assert(Devices.size() == 0 && "Plugin already initialized"); 1606330d8983SJohannes Doerfert Devices.resize(NumDevices, nullptr); 1607330d8983SJohannes Doerfert 1608330d8983SJohannes Doerfert GlobalHandler = createGlobalHandler(); 1609330d8983SJohannes Doerfert assert(GlobalHandler && "Invalid global handler"); 1610330d8983SJohannes Doerfert 1611330d8983SJohannes Doerfert RPCServer = new RPCServerTy(*this); 1612330d8983SJohannes Doerfert assert(RPCServer && "Invalid RPC server"); 1613330d8983SJohannes Doerfert 1614f42f57b5SJoseph Huber RecordReplay = new RecordReplayTy(); 1615f42f57b5SJoseph Huber assert(RecordReplay && "Invalid RR interface"); 1616f42f57b5SJoseph Huber 1617330d8983SJohannes Doerfert return Plugin::success(); 1618330d8983SJohannes Doerfert } 1619330d8983SJohannes Doerfert 1620330d8983SJohannes Doerfert Error GenericPluginTy::deinit() { 1621435aa766SJoseph Huber assert(Initialized && "Plugin was not initialized!"); 1622435aa766SJoseph Huber 1623330d8983SJohannes Doerfert // Deinitialize all active devices. 1624330d8983SJohannes Doerfert for (int32_t DeviceId = 0; DeviceId < NumDevices; ++DeviceId) { 1625330d8983SJohannes Doerfert if (Devices[DeviceId]) { 1626330d8983SJohannes Doerfert if (auto Err = deinitDevice(DeviceId)) 1627330d8983SJohannes Doerfert return Err; 1628330d8983SJohannes Doerfert } 1629330d8983SJohannes Doerfert assert(!Devices[DeviceId] && "Device was not deinitialized"); 1630330d8983SJohannes Doerfert } 1631330d8983SJohannes Doerfert 1632330d8983SJohannes Doerfert // There is no global handler if no device is available. 1633330d8983SJohannes Doerfert if (GlobalHandler) 1634330d8983SJohannes Doerfert delete GlobalHandler; 1635330d8983SJohannes Doerfert 1636*38b3f45aSJoseph Huber if (RPCServer && RPCServer->Thread->Running.load(std::memory_order_relaxed)) 1637134401deSJoseph Huber if (Error Err = RPCServer->shutDown()) 1638134401deSJoseph Huber return Err; 1639f0750584SJoseph Huber 1640f0750584SJoseph Huber if (RPCServer) 1641330d8983SJohannes Doerfert delete RPCServer; 1642330d8983SJohannes Doerfert 1643f42f57b5SJoseph Huber if (RecordReplay) 1644f42f57b5SJoseph Huber delete RecordReplay; 1645f42f57b5SJoseph Huber 1646330d8983SJohannes Doerfert // Perform last deinitializations on the plugin. 1647435aa766SJoseph Huber if (Error Err = deinitImpl()) 1648435aa766SJoseph Huber return Err; 1649435aa766SJoseph Huber Initialized = false; 1650435aa766SJoseph Huber 1651435aa766SJoseph Huber return Plugin::success(); 1652330d8983SJohannes Doerfert } 1653330d8983SJohannes Doerfert 1654330d8983SJohannes Doerfert Error GenericPluginTy::initDevice(int32_t DeviceId) { 1655330d8983SJohannes Doerfert assert(!Devices[DeviceId] && "Device already initialized"); 1656330d8983SJohannes Doerfert 1657330d8983SJohannes Doerfert // Create the device and save the reference. 1658330d8983SJohannes Doerfert GenericDeviceTy *Device = createDevice(*this, DeviceId, NumDevices); 1659330d8983SJohannes Doerfert assert(Device && "Invalid device"); 1660330d8983SJohannes Doerfert 1661330d8983SJohannes Doerfert // Save the device reference into the list. 1662330d8983SJohannes Doerfert Devices[DeviceId] = Device; 1663330d8983SJohannes Doerfert 1664330d8983SJohannes Doerfert // Initialize the device and its resources. 1665330d8983SJohannes Doerfert return Device->init(*this); 1666330d8983SJohannes Doerfert } 1667330d8983SJohannes Doerfert 1668330d8983SJohannes Doerfert Error GenericPluginTy::deinitDevice(int32_t DeviceId) { 1669330d8983SJohannes Doerfert // The device may be already deinitialized. 1670330d8983SJohannes Doerfert if (Devices[DeviceId] == nullptr) 1671330d8983SJohannes Doerfert return Plugin::success(); 1672330d8983SJohannes Doerfert 1673330d8983SJohannes Doerfert // Deinitialize the device and release its resources. 1674330d8983SJohannes Doerfert if (auto Err = Devices[DeviceId]->deinit(*this)) 1675330d8983SJohannes Doerfert return Err; 1676330d8983SJohannes Doerfert 1677330d8983SJohannes Doerfert // Delete the device and invalidate its reference. 1678330d8983SJohannes Doerfert delete Devices[DeviceId]; 1679330d8983SJohannes Doerfert Devices[DeviceId] = nullptr; 1680330d8983SJohannes Doerfert 1681330d8983SJohannes Doerfert return Plugin::success(); 1682330d8983SJohannes Doerfert } 1683330d8983SJohannes Doerfert 1684330d8983SJohannes Doerfert Expected<bool> GenericPluginTy::checkELFImage(StringRef Image) const { 1685330d8983SJohannes Doerfert // First check if this image is a regular ELF file. 1686330d8983SJohannes Doerfert if (!utils::elf::isELF(Image)) 1687330d8983SJohannes Doerfert return false; 1688330d8983SJohannes Doerfert 1689330d8983SJohannes Doerfert // Check if this image is an ELF with a matching machine value. 1690330d8983SJohannes Doerfert auto MachineOrErr = utils::elf::checkMachine(Image, getMagicElfBits()); 1691330d8983SJohannes Doerfert if (!MachineOrErr) 1692330d8983SJohannes Doerfert return MachineOrErr.takeError(); 1693330d8983SJohannes Doerfert 169421f3a609SJoseph Huber return MachineOrErr; 1695330d8983SJohannes Doerfert } 1696330d8983SJohannes Doerfert 169721f3a609SJoseph Huber Expected<bool> GenericPluginTy::checkBitcodeImage(StringRef Image) const { 169821f3a609SJoseph Huber if (identify_magic(Image) != file_magic::bitcode) 169921f3a609SJoseph Huber return false; 170021f3a609SJoseph Huber 170121f3a609SJoseph Huber LLVMContext Context; 170221f3a609SJoseph Huber auto ModuleOrErr = getLazyBitcodeModule(MemoryBufferRef(Image, ""), Context, 170321f3a609SJoseph Huber /*ShouldLazyLoadMetadata=*/true); 170421f3a609SJoseph Huber if (!ModuleOrErr) 170521f3a609SJoseph Huber return ModuleOrErr.takeError(); 170621f3a609SJoseph Huber Module &M = **ModuleOrErr; 170721f3a609SJoseph Huber 170821f3a609SJoseph Huber return Triple(M.getTargetTriple()).getArch() == getTripleArch(); 170921f3a609SJoseph Huber } 171021f3a609SJoseph Huber 171121f3a609SJoseph Huber int32_t GenericPluginTy::is_initialized() const { return Initialized; } 171221f3a609SJoseph Huber 1713435aa766SJoseph Huber int32_t GenericPluginTy::is_plugin_compatible(__tgt_device_image *Image) { 1714330d8983SJohannes Doerfert StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart), 171508533a3eSJohannes Doerfert utils::getPtrDiff(Image->ImageEnd, Image->ImageStart)); 1716330d8983SJohannes Doerfert 1717330d8983SJohannes Doerfert auto HandleError = [&](Error Err) -> bool { 1718330d8983SJohannes Doerfert [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); 1719330d8983SJohannes Doerfert DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str()); 1720330d8983SJohannes Doerfert return false; 1721330d8983SJohannes Doerfert }; 1722330d8983SJohannes Doerfert switch (identify_magic(Buffer)) { 1723330d8983SJohannes Doerfert case file_magic::elf: 1724330d8983SJohannes Doerfert case file_magic::elf_relocatable: 1725330d8983SJohannes Doerfert case file_magic::elf_executable: 1726330d8983SJohannes Doerfert case file_magic::elf_shared_object: 1727330d8983SJohannes Doerfert case file_magic::elf_core: { 1728330d8983SJohannes Doerfert auto MatchOrErr = checkELFImage(Buffer); 1729330d8983SJohannes Doerfert if (Error Err = MatchOrErr.takeError()) 1730330d8983SJohannes Doerfert return HandleError(std::move(Err)); 1731330d8983SJohannes Doerfert return *MatchOrErr; 1732435aa766SJoseph Huber } 1733435aa766SJoseph Huber case file_magic::bitcode: { 1734435aa766SJoseph Huber auto MatchOrErr = checkBitcodeImage(Buffer); 1735435aa766SJoseph Huber if (Error Err = MatchOrErr.takeError()) 1736435aa766SJoseph Huber return HandleError(std::move(Err)); 1737435aa766SJoseph Huber return *MatchOrErr; 1738435aa766SJoseph Huber } 1739435aa766SJoseph Huber default: 1740435aa766SJoseph Huber return false; 1741435aa766SJoseph Huber } 1742435aa766SJoseph Huber } 1743435aa766SJoseph Huber 1744435aa766SJoseph Huber int32_t GenericPluginTy::is_device_compatible(int32_t DeviceId, 1745435aa766SJoseph Huber __tgt_device_image *Image) { 1746435aa766SJoseph Huber StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart), 174708533a3eSJohannes Doerfert utils::getPtrDiff(Image->ImageEnd, Image->ImageStart)); 1748435aa766SJoseph Huber 1749435aa766SJoseph Huber auto HandleError = [&](Error Err) -> bool { 1750435aa766SJoseph Huber [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); 1751435aa766SJoseph Huber DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str()); 1752435aa766SJoseph Huber return false; 1753435aa766SJoseph Huber }; 1754435aa766SJoseph Huber switch (identify_magic(Buffer)) { 1755435aa766SJoseph Huber case file_magic::elf: 1756435aa766SJoseph Huber case file_magic::elf_relocatable: 1757435aa766SJoseph Huber case file_magic::elf_executable: 1758435aa766SJoseph Huber case file_magic::elf_shared_object: 1759435aa766SJoseph Huber case file_magic::elf_core: { 1760435aa766SJoseph Huber auto MatchOrErr = checkELFImage(Buffer); 1761435aa766SJoseph Huber if (Error Err = MatchOrErr.takeError()) 1762435aa766SJoseph Huber return HandleError(std::move(Err)); 1763435aa766SJoseph Huber if (!*MatchOrErr) 1764435aa766SJoseph Huber return false; 176521f3a609SJoseph Huber 176621f3a609SJoseph Huber // Perform plugin-dependent checks for the specific architecture if needed. 1767435aa766SJoseph Huber auto CompatibleOrErr = isELFCompatible(DeviceId, Buffer); 176821f3a609SJoseph Huber if (Error Err = CompatibleOrErr.takeError()) 176921f3a609SJoseph Huber return HandleError(std::move(Err)); 177021f3a609SJoseph Huber return *CompatibleOrErr; 1771330d8983SJohannes Doerfert } 1772330d8983SJohannes Doerfert case file_magic::bitcode: { 177321f3a609SJoseph Huber auto MatchOrErr = checkBitcodeImage(Buffer); 1774330d8983SJohannes Doerfert if (Error Err = MatchOrErr.takeError()) 1775330d8983SJohannes Doerfert return HandleError(std::move(Err)); 1776330d8983SJohannes Doerfert return *MatchOrErr; 1777330d8983SJohannes Doerfert } 1778330d8983SJohannes Doerfert default: 1779330d8983SJohannes Doerfert return false; 1780330d8983SJohannes Doerfert } 1781330d8983SJohannes Doerfert } 1782330d8983SJohannes Doerfert 1783435aa766SJoseph Huber int32_t GenericPluginTy::is_device_initialized(int32_t DeviceId) const { 1784435aa766SJoseph Huber return isValidDeviceId(DeviceId) && Devices[DeviceId] != nullptr; 1785435aa766SJoseph Huber } 1786435aa766SJoseph Huber 1787330d8983SJohannes Doerfert int32_t GenericPluginTy::init_device(int32_t DeviceId) { 1788330d8983SJohannes Doerfert auto Err = initDevice(DeviceId); 1789330d8983SJohannes Doerfert if (Err) { 1790330d8983SJohannes Doerfert REPORT("Failure to initialize device %d: %s\n", DeviceId, 1791330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1792330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1793330d8983SJohannes Doerfert } 1794330d8983SJohannes Doerfert 1795330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1796330d8983SJohannes Doerfert } 1797330d8983SJohannes Doerfert 1798330d8983SJohannes Doerfert int32_t GenericPluginTy::number_of_devices() { return getNumDevices(); } 1799330d8983SJohannes Doerfert 1800330d8983SJohannes Doerfert int32_t GenericPluginTy::is_data_exchangable(int32_t SrcDeviceId, 1801330d8983SJohannes Doerfert int32_t DstDeviceId) { 1802330d8983SJohannes Doerfert return isDataExchangable(SrcDeviceId, DstDeviceId); 1803330d8983SJohannes Doerfert } 1804330d8983SJohannes Doerfert 1805330d8983SJohannes Doerfert int32_t GenericPluginTy::initialize_record_replay(int32_t DeviceId, 1806330d8983SJohannes Doerfert int64_t MemorySize, 1807330d8983SJohannes Doerfert void *VAddr, bool isRecord, 1808330d8983SJohannes Doerfert bool SaveOutput, 1809330d8983SJohannes Doerfert uint64_t &ReqPtrArgOffset) { 1810330d8983SJohannes Doerfert GenericDeviceTy &Device = getDevice(DeviceId); 1811330d8983SJohannes Doerfert RecordReplayTy::RRStatusTy Status = 1812330d8983SJohannes Doerfert isRecord ? RecordReplayTy::RRStatusTy::RRRecording 1813330d8983SJohannes Doerfert : RecordReplayTy::RRStatusTy::RRReplaying; 1814330d8983SJohannes Doerfert 1815f42f57b5SJoseph Huber if (auto Err = RecordReplay->init(&Device, MemorySize, VAddr, Status, 1816330d8983SJohannes Doerfert SaveOutput, ReqPtrArgOffset)) { 1817330d8983SJohannes Doerfert REPORT("WARNING RR did not intialize RR-properly with %lu bytes" 1818330d8983SJohannes Doerfert "(Error: %s)\n", 1819330d8983SJohannes Doerfert MemorySize, toString(std::move(Err)).data()); 1820f42f57b5SJoseph Huber RecordReplay->setStatus(RecordReplayTy::RRStatusTy::RRDeactivated); 1821330d8983SJohannes Doerfert 1822330d8983SJohannes Doerfert if (!isRecord) { 1823330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1824330d8983SJohannes Doerfert } 1825330d8983SJohannes Doerfert } 1826330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1827330d8983SJohannes Doerfert } 1828330d8983SJohannes Doerfert 1829330d8983SJohannes Doerfert int32_t GenericPluginTy::load_binary(int32_t DeviceId, 1830330d8983SJohannes Doerfert __tgt_device_image *TgtImage, 1831330d8983SJohannes Doerfert __tgt_device_binary *Binary) { 1832330d8983SJohannes Doerfert GenericDeviceTy &Device = getDevice(DeviceId); 1833330d8983SJohannes Doerfert 1834330d8983SJohannes Doerfert auto ImageOrErr = Device.loadBinary(*this, TgtImage); 1835330d8983SJohannes Doerfert if (!ImageOrErr) { 1836330d8983SJohannes Doerfert auto Err = ImageOrErr.takeError(); 1837330d8983SJohannes Doerfert REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage, 1838330d8983SJohannes Doerfert DeviceId, toString(std::move(Err)).data()); 1839330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1840330d8983SJohannes Doerfert } 1841330d8983SJohannes Doerfert 1842330d8983SJohannes Doerfert DeviceImageTy *Image = *ImageOrErr; 1843330d8983SJohannes Doerfert assert(Image != nullptr && "Invalid Image"); 1844330d8983SJohannes Doerfert 1845330d8983SJohannes Doerfert *Binary = __tgt_device_binary{reinterpret_cast<uint64_t>(Image)}; 1846330d8983SJohannes Doerfert 1847330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1848330d8983SJohannes Doerfert } 1849330d8983SJohannes Doerfert 1850330d8983SJohannes Doerfert void *GenericPluginTy::data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, 1851330d8983SJohannes Doerfert int32_t Kind) { 1852330d8983SJohannes Doerfert auto AllocOrErr = 1853330d8983SJohannes Doerfert getDevice(DeviceId).dataAlloc(Size, HostPtr, (TargetAllocTy)Kind); 1854330d8983SJohannes Doerfert if (!AllocOrErr) { 1855330d8983SJohannes Doerfert auto Err = AllocOrErr.takeError(); 1856330d8983SJohannes Doerfert REPORT("Failure to allocate device memory: %s\n", 1857330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1858330d8983SJohannes Doerfert return nullptr; 1859330d8983SJohannes Doerfert } 1860330d8983SJohannes Doerfert assert(*AllocOrErr && "Null pointer upon successful allocation"); 1861330d8983SJohannes Doerfert 1862330d8983SJohannes Doerfert return *AllocOrErr; 1863330d8983SJohannes Doerfert } 1864330d8983SJohannes Doerfert 1865330d8983SJohannes Doerfert int32_t GenericPluginTy::data_delete(int32_t DeviceId, void *TgtPtr, 1866330d8983SJohannes Doerfert int32_t Kind) { 1867330d8983SJohannes Doerfert auto Err = 1868330d8983SJohannes Doerfert getDevice(DeviceId).dataDelete(TgtPtr, static_cast<TargetAllocTy>(Kind)); 1869330d8983SJohannes Doerfert if (Err) { 1870330d8983SJohannes Doerfert REPORT("Failure to deallocate device pointer %p: %s\n", TgtPtr, 1871330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1872330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1873330d8983SJohannes Doerfert } 1874330d8983SJohannes Doerfert 1875330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1876330d8983SJohannes Doerfert } 1877330d8983SJohannes Doerfert 1878330d8983SJohannes Doerfert int32_t GenericPluginTy::data_lock(int32_t DeviceId, void *Ptr, int64_t Size, 1879330d8983SJohannes Doerfert void **LockedPtr) { 1880330d8983SJohannes Doerfert auto LockedPtrOrErr = getDevice(DeviceId).dataLock(Ptr, Size); 1881330d8983SJohannes Doerfert if (!LockedPtrOrErr) { 1882330d8983SJohannes Doerfert auto Err = LockedPtrOrErr.takeError(); 1883330d8983SJohannes Doerfert REPORT("Failure to lock memory %p: %s\n", Ptr, 1884330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1885330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1886330d8983SJohannes Doerfert } 1887330d8983SJohannes Doerfert 1888330d8983SJohannes Doerfert if (!(*LockedPtrOrErr)) { 1889330d8983SJohannes Doerfert REPORT("Failure to lock memory %p: obtained a null locked pointer\n", Ptr); 1890330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1891330d8983SJohannes Doerfert } 1892330d8983SJohannes Doerfert *LockedPtr = *LockedPtrOrErr; 1893330d8983SJohannes Doerfert 1894330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1895330d8983SJohannes Doerfert } 1896330d8983SJohannes Doerfert 1897330d8983SJohannes Doerfert int32_t GenericPluginTy::data_unlock(int32_t DeviceId, void *Ptr) { 1898330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).dataUnlock(Ptr); 1899330d8983SJohannes Doerfert if (Err) { 1900330d8983SJohannes Doerfert REPORT("Failure to unlock memory %p: %s\n", Ptr, 1901330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1902330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1903330d8983SJohannes Doerfert } 1904330d8983SJohannes Doerfert 1905330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1906330d8983SJohannes Doerfert } 1907330d8983SJohannes Doerfert 1908330d8983SJohannes Doerfert int32_t GenericPluginTy::data_notify_mapped(int32_t DeviceId, void *HstPtr, 1909330d8983SJohannes Doerfert int64_t Size) { 1910330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).notifyDataMapped(HstPtr, Size); 1911330d8983SJohannes Doerfert if (Err) { 1912330d8983SJohannes Doerfert REPORT("Failure to notify data mapped %p: %s\n", HstPtr, 1913330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1914330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1915330d8983SJohannes Doerfert } 1916330d8983SJohannes Doerfert 1917330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1918330d8983SJohannes Doerfert } 1919330d8983SJohannes Doerfert 1920330d8983SJohannes Doerfert int32_t GenericPluginTy::data_notify_unmapped(int32_t DeviceId, void *HstPtr) { 1921330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).notifyDataUnmapped(HstPtr); 1922330d8983SJohannes Doerfert if (Err) { 1923330d8983SJohannes Doerfert REPORT("Failure to notify data unmapped %p: %s\n", HstPtr, 1924330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1925330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1926330d8983SJohannes Doerfert } 1927330d8983SJohannes Doerfert 1928330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1929330d8983SJohannes Doerfert } 1930330d8983SJohannes Doerfert 1931330d8983SJohannes Doerfert int32_t GenericPluginTy::data_submit(int32_t DeviceId, void *TgtPtr, 1932330d8983SJohannes Doerfert void *HstPtr, int64_t Size) { 1933330d8983SJohannes Doerfert return data_submit_async(DeviceId, TgtPtr, HstPtr, Size, 1934330d8983SJohannes Doerfert /*AsyncInfoPtr=*/nullptr); 1935330d8983SJohannes Doerfert } 1936330d8983SJohannes Doerfert 1937330d8983SJohannes Doerfert int32_t GenericPluginTy::data_submit_async(int32_t DeviceId, void *TgtPtr, 1938330d8983SJohannes Doerfert void *HstPtr, int64_t Size, 1939330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr) { 1940330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, Size, AsyncInfoPtr); 1941330d8983SJohannes Doerfert if (Err) { 1942330d8983SJohannes Doerfert REPORT("Failure to copy data from host to device. Pointers: host " 1943330d8983SJohannes Doerfert "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", 1944330d8983SJohannes Doerfert DPxPTR(HstPtr), DPxPTR(TgtPtr), Size, 1945330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1946330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1947330d8983SJohannes Doerfert } 1948330d8983SJohannes Doerfert 1949330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1950330d8983SJohannes Doerfert } 1951330d8983SJohannes Doerfert 1952330d8983SJohannes Doerfert int32_t GenericPluginTy::data_retrieve(int32_t DeviceId, void *HstPtr, 1953330d8983SJohannes Doerfert void *TgtPtr, int64_t Size) { 1954330d8983SJohannes Doerfert return data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size, 1955330d8983SJohannes Doerfert /*AsyncInfoPtr=*/nullptr); 1956330d8983SJohannes Doerfert } 1957330d8983SJohannes Doerfert 1958330d8983SJohannes Doerfert int32_t GenericPluginTy::data_retrieve_async(int32_t DeviceId, void *HstPtr, 1959330d8983SJohannes Doerfert void *TgtPtr, int64_t Size, 1960330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr) { 1961330d8983SJohannes Doerfert auto Err = 1962330d8983SJohannes Doerfert getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr, Size, AsyncInfoPtr); 1963330d8983SJohannes Doerfert if (Err) { 1964330d8983SJohannes Doerfert REPORT("Faliure to copy data from device to host. Pointers: host " 1965330d8983SJohannes Doerfert "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", 1966330d8983SJohannes Doerfert DPxPTR(HstPtr), DPxPTR(TgtPtr), Size, 1967330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1968330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1969330d8983SJohannes Doerfert } 1970330d8983SJohannes Doerfert 1971330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1972330d8983SJohannes Doerfert } 1973330d8983SJohannes Doerfert 1974330d8983SJohannes Doerfert int32_t GenericPluginTy::data_exchange(int32_t SrcDeviceId, void *SrcPtr, 1975330d8983SJohannes Doerfert int32_t DstDeviceId, void *DstPtr, 1976330d8983SJohannes Doerfert int64_t Size) { 1977330d8983SJohannes Doerfert return data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr, Size, 1978330d8983SJohannes Doerfert /*AsyncInfoPtr=*/nullptr); 1979330d8983SJohannes Doerfert } 1980330d8983SJohannes Doerfert 1981330d8983SJohannes Doerfert int32_t GenericPluginTy::data_exchange_async(int32_t SrcDeviceId, void *SrcPtr, 1982330d8983SJohannes Doerfert int DstDeviceId, void *DstPtr, 1983330d8983SJohannes Doerfert int64_t Size, 1984330d8983SJohannes Doerfert __tgt_async_info *AsyncInfo) { 1985330d8983SJohannes Doerfert GenericDeviceTy &SrcDevice = getDevice(SrcDeviceId); 1986330d8983SJohannes Doerfert GenericDeviceTy &DstDevice = getDevice(DstDeviceId); 1987330d8983SJohannes Doerfert auto Err = SrcDevice.dataExchange(SrcPtr, DstDevice, DstPtr, Size, AsyncInfo); 1988330d8983SJohannes Doerfert if (Err) { 1989330d8983SJohannes Doerfert REPORT("Failure to copy data from device (%d) to device (%d). Pointers: " 1990330d8983SJohannes Doerfert "host = " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", 1991330d8983SJohannes Doerfert SrcDeviceId, DstDeviceId, DPxPTR(SrcPtr), DPxPTR(DstPtr), Size, 1992330d8983SJohannes Doerfert toString(std::move(Err)).data()); 1993330d8983SJohannes Doerfert return OFFLOAD_FAIL; 1994330d8983SJohannes Doerfert } 1995330d8983SJohannes Doerfert 1996330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 1997330d8983SJohannes Doerfert } 1998330d8983SJohannes Doerfert 1999330d8983SJohannes Doerfert int32_t GenericPluginTy::launch_kernel(int32_t DeviceId, void *TgtEntryPtr, 2000330d8983SJohannes Doerfert void **TgtArgs, ptrdiff_t *TgtOffsets, 2001330d8983SJohannes Doerfert KernelArgsTy *KernelArgs, 2002330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr) { 2003330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).launchKernel(TgtEntryPtr, TgtArgs, TgtOffsets, 2004330d8983SJohannes Doerfert *KernelArgs, AsyncInfoPtr); 2005330d8983SJohannes Doerfert if (Err) { 2006330d8983SJohannes Doerfert REPORT("Failure to run target region " DPxMOD " in device %d: %s\n", 2007330d8983SJohannes Doerfert DPxPTR(TgtEntryPtr), DeviceId, toString(std::move(Err)).data()); 2008330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2009330d8983SJohannes Doerfert } 2010330d8983SJohannes Doerfert 2011330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2012330d8983SJohannes Doerfert } 2013330d8983SJohannes Doerfert 2014330d8983SJohannes Doerfert int32_t GenericPluginTy::synchronize(int32_t DeviceId, 2015330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr) { 2016330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).synchronize(AsyncInfoPtr); 2017330d8983SJohannes Doerfert if (Err) { 2018330d8983SJohannes Doerfert REPORT("Failure to synchronize stream %p: %s\n", AsyncInfoPtr->Queue, 2019330d8983SJohannes Doerfert toString(std::move(Err)).data()); 2020330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2021330d8983SJohannes Doerfert } 2022330d8983SJohannes Doerfert 2023330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2024330d8983SJohannes Doerfert } 2025330d8983SJohannes Doerfert 2026330d8983SJohannes Doerfert int32_t GenericPluginTy::query_async(int32_t DeviceId, 2027330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr) { 2028330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).queryAsync(AsyncInfoPtr); 2029330d8983SJohannes Doerfert if (Err) { 2030330d8983SJohannes Doerfert REPORT("Failure to query stream %p: %s\n", AsyncInfoPtr->Queue, 2031330d8983SJohannes Doerfert toString(std::move(Err)).data()); 2032330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2033330d8983SJohannes Doerfert } 2034330d8983SJohannes Doerfert 2035330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2036330d8983SJohannes Doerfert } 2037330d8983SJohannes Doerfert 2038330d8983SJohannes Doerfert void GenericPluginTy::print_device_info(int32_t DeviceId) { 2039330d8983SJohannes Doerfert if (auto Err = getDevice(DeviceId).printInfo()) 2040330d8983SJohannes Doerfert REPORT("Failure to print device %d info: %s\n", DeviceId, 2041330d8983SJohannes Doerfert toString(std::move(Err)).data()); 2042330d8983SJohannes Doerfert } 2043330d8983SJohannes Doerfert 2044330d8983SJohannes Doerfert int32_t GenericPluginTy::create_event(int32_t DeviceId, void **EventPtr) { 2045330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).createEvent(EventPtr); 2046330d8983SJohannes Doerfert if (Err) { 2047330d8983SJohannes Doerfert REPORT("Failure to create event: %s\n", toString(std::move(Err)).data()); 2048330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2049330d8983SJohannes Doerfert } 2050330d8983SJohannes Doerfert 2051330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2052330d8983SJohannes Doerfert } 2053330d8983SJohannes Doerfert 2054330d8983SJohannes Doerfert int32_t GenericPluginTy::record_event(int32_t DeviceId, void *EventPtr, 2055330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr) { 2056330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr); 2057330d8983SJohannes Doerfert if (Err) { 2058330d8983SJohannes Doerfert REPORT("Failure to record event %p: %s\n", EventPtr, 2059330d8983SJohannes Doerfert toString(std::move(Err)).data()); 2060330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2061330d8983SJohannes Doerfert } 2062330d8983SJohannes Doerfert 2063330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2064330d8983SJohannes Doerfert } 2065330d8983SJohannes Doerfert 2066330d8983SJohannes Doerfert int32_t GenericPluginTy::wait_event(int32_t DeviceId, void *EventPtr, 2067330d8983SJohannes Doerfert __tgt_async_info *AsyncInfoPtr) { 2068330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr); 2069330d8983SJohannes Doerfert if (Err) { 2070330d8983SJohannes Doerfert REPORT("Failure to wait event %p: %s\n", EventPtr, 2071330d8983SJohannes Doerfert toString(std::move(Err)).data()); 2072330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2073330d8983SJohannes Doerfert } 2074330d8983SJohannes Doerfert 2075330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2076330d8983SJohannes Doerfert } 2077330d8983SJohannes Doerfert 2078330d8983SJohannes Doerfert int32_t GenericPluginTy::sync_event(int32_t DeviceId, void *EventPtr) { 2079330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).syncEvent(EventPtr); 2080330d8983SJohannes Doerfert if (Err) { 2081330d8983SJohannes Doerfert REPORT("Failure to synchronize event %p: %s\n", EventPtr, 2082330d8983SJohannes Doerfert toString(std::move(Err)).data()); 2083330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2084330d8983SJohannes Doerfert } 2085330d8983SJohannes Doerfert 2086330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2087330d8983SJohannes Doerfert } 2088330d8983SJohannes Doerfert 2089330d8983SJohannes Doerfert int32_t GenericPluginTy::destroy_event(int32_t DeviceId, void *EventPtr) { 2090330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).destroyEvent(EventPtr); 2091330d8983SJohannes Doerfert if (Err) { 2092330d8983SJohannes Doerfert REPORT("Failure to destroy event %p: %s\n", EventPtr, 2093330d8983SJohannes Doerfert toString(std::move(Err)).data()); 2094330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2095330d8983SJohannes Doerfert } 2096330d8983SJohannes Doerfert 2097330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2098330d8983SJohannes Doerfert } 2099330d8983SJohannes Doerfert 2100330d8983SJohannes Doerfert void GenericPluginTy::set_info_flag(uint32_t NewInfoLevel) { 2101330d8983SJohannes Doerfert std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal(); 2102330d8983SJohannes Doerfert InfoLevel.store(NewInfoLevel); 2103330d8983SJohannes Doerfert } 2104330d8983SJohannes Doerfert 2105330d8983SJohannes Doerfert int32_t GenericPluginTy::init_async_info(int32_t DeviceId, 2106330d8983SJohannes Doerfert __tgt_async_info **AsyncInfoPtr) { 2107330d8983SJohannes Doerfert assert(AsyncInfoPtr && "Invalid async info"); 2108330d8983SJohannes Doerfert 2109330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr); 2110330d8983SJohannes Doerfert if (Err) { 2111330d8983SJohannes Doerfert REPORT("Failure to initialize async info at " DPxMOD " on device %d: %s\n", 2112330d8983SJohannes Doerfert DPxPTR(*AsyncInfoPtr), DeviceId, toString(std::move(Err)).data()); 2113330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2114330d8983SJohannes Doerfert } 2115330d8983SJohannes Doerfert 2116330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2117330d8983SJohannes Doerfert } 2118330d8983SJohannes Doerfert 2119330d8983SJohannes Doerfert int32_t GenericPluginTy::init_device_info(int32_t DeviceId, 2120330d8983SJohannes Doerfert __tgt_device_info *DeviceInfo, 2121330d8983SJohannes Doerfert const char **ErrStr) { 2122330d8983SJohannes Doerfert *ErrStr = ""; 2123330d8983SJohannes Doerfert 2124330d8983SJohannes Doerfert auto Err = getDevice(DeviceId).initDeviceInfo(DeviceInfo); 2125330d8983SJohannes Doerfert if (Err) { 2126330d8983SJohannes Doerfert REPORT("Failure to initialize device info at " DPxMOD " on device %d: %s\n", 2127330d8983SJohannes Doerfert DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data()); 2128330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2129330d8983SJohannes Doerfert } 2130330d8983SJohannes Doerfert 2131330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2132330d8983SJohannes Doerfert } 2133330d8983SJohannes Doerfert 2134435aa766SJoseph Huber int32_t GenericPluginTy::set_device_identifier(int32_t UserId, 2135435aa766SJoseph Huber int32_t DeviceId) { 2136435aa766SJoseph Huber UserDeviceIds[DeviceId] = UserId; 2137330d8983SJohannes Doerfert 2138330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2139330d8983SJohannes Doerfert } 2140330d8983SJohannes Doerfert 2141330d8983SJohannes Doerfert int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) { 2142330d8983SJohannes Doerfert return getDevice(DeviceId).useAutoZeroCopy(); 2143330d8983SJohannes Doerfert } 2144330d8983SJohannes Doerfert 2145330d8983SJohannes Doerfert int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size, 2146330d8983SJohannes Doerfert const char *Name, void **DevicePtr) { 2147330d8983SJohannes Doerfert assert(Binary.handle && "Invalid device binary handle"); 2148330d8983SJohannes Doerfert DeviceImageTy &Image = *reinterpret_cast<DeviceImageTy *>(Binary.handle); 2149330d8983SJohannes Doerfert 2150330d8983SJohannes Doerfert GenericDeviceTy &Device = Image.getDevice(); 2151330d8983SJohannes Doerfert 2152330d8983SJohannes Doerfert GlobalTy DeviceGlobal(Name, Size); 2153330d8983SJohannes Doerfert GenericGlobalHandlerTy &GHandler = getGlobalHandler(); 2154330d8983SJohannes Doerfert if (auto Err = 2155330d8983SJohannes Doerfert GHandler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) { 2156330d8983SJohannes Doerfert REPORT("Failure to look up global address: %s\n", 2157330d8983SJohannes Doerfert toString(std::move(Err)).data()); 2158330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2159330d8983SJohannes Doerfert } 2160330d8983SJohannes Doerfert 2161330d8983SJohannes Doerfert *DevicePtr = DeviceGlobal.getPtr(); 2162330d8983SJohannes Doerfert assert(DevicePtr && "Invalid device global's address"); 2163330d8983SJohannes Doerfert 2164330d8983SJohannes Doerfert // Save the loaded globals if we are recording. 2165f42f57b5SJoseph Huber RecordReplayTy &RecordReplay = Device.Plugin.getRecordReplay(); 2166330d8983SJohannes Doerfert if (RecordReplay.isRecording()) 2167330d8983SJohannes Doerfert RecordReplay.addEntry(Name, Size, *DevicePtr); 2168330d8983SJohannes Doerfert 2169330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2170330d8983SJohannes Doerfert } 2171330d8983SJohannes Doerfert 2172330d8983SJohannes Doerfert int32_t GenericPluginTy::get_function(__tgt_device_binary Binary, 2173330d8983SJohannes Doerfert const char *Name, void **KernelPtr) { 2174330d8983SJohannes Doerfert assert(Binary.handle && "Invalid device binary handle"); 2175330d8983SJohannes Doerfert DeviceImageTy &Image = *reinterpret_cast<DeviceImageTy *>(Binary.handle); 2176330d8983SJohannes Doerfert 2177330d8983SJohannes Doerfert GenericDeviceTy &Device = Image.getDevice(); 2178330d8983SJohannes Doerfert 2179330d8983SJohannes Doerfert auto KernelOrErr = Device.constructKernel(Name); 2180330d8983SJohannes Doerfert if (Error Err = KernelOrErr.takeError()) { 2181330d8983SJohannes Doerfert REPORT("Failure to look up kernel: %s\n", toString(std::move(Err)).data()); 2182330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2183330d8983SJohannes Doerfert } 2184330d8983SJohannes Doerfert 2185330d8983SJohannes Doerfert GenericKernelTy &Kernel = *KernelOrErr; 2186330d8983SJohannes Doerfert if (auto Err = Kernel.init(Device, Image)) { 2187330d8983SJohannes Doerfert REPORT("Failure to init kernel: %s\n", toString(std::move(Err)).data()); 2188330d8983SJohannes Doerfert return OFFLOAD_FAIL; 2189330d8983SJohannes Doerfert } 2190330d8983SJohannes Doerfert 2191330d8983SJohannes Doerfert // Note that this is not the kernel's device address. 2192330d8983SJohannes Doerfert *KernelPtr = &Kernel; 2193330d8983SJohannes Doerfert return OFFLOAD_SUCCESS; 2194330d8983SJohannes Doerfert } 2195