xref: /llvm-project/offload/plugins-nextgen/common/src/PluginInterface.cpp (revision 38b3f45a811282511c014cffd09a8ae2a96435ba)
1330d8983SJohannes Doerfert //===- PluginInterface.cpp - Target independent plugin device interface ---===//
2330d8983SJohannes Doerfert //
3330d8983SJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4330d8983SJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
5330d8983SJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6330d8983SJohannes Doerfert //
7330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
8330d8983SJohannes Doerfert //
9330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
10330d8983SJohannes Doerfert 
11330d8983SJohannes Doerfert #include "PluginInterface.h"
12330d8983SJohannes Doerfert 
13330d8983SJohannes Doerfert #include "Shared/APITypes.h"
14330d8983SJohannes Doerfert #include "Shared/Debug.h"
15330d8983SJohannes Doerfert #include "Shared/Environment.h"
16330d8983SJohannes Doerfert 
17c95abe94SJohannes Doerfert #include "ErrorReporting.h"
18330d8983SJohannes Doerfert #include "GlobalHandler.h"
19330d8983SJohannes Doerfert #include "JIT.h"
2008533a3eSJohannes Doerfert #include "Shared/Utils.h"
21330d8983SJohannes Doerfert #include "Utils/ELF.h"
22330d8983SJohannes Doerfert #include "omptarget.h"
23330d8983SJohannes Doerfert 
24330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT
25330d8983SJohannes Doerfert #include "OpenMP/OMPT/Callback.h"
26330d8983SJohannes Doerfert #include "omp-tools.h"
27330d8983SJohannes Doerfert #endif
28330d8983SJohannes Doerfert 
2921f3a609SJoseph Huber #include "llvm/Bitcode/BitcodeReader.h"
30330d8983SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h"
31330d8983SJohannes Doerfert #include "llvm/Support/Error.h"
32330d8983SJohannes Doerfert #include "llvm/Support/JSON.h"
33330d8983SJohannes Doerfert #include "llvm/Support/MathExtras.h"
34330d8983SJohannes Doerfert #include "llvm/Support/MemoryBuffer.h"
35c95abe94SJohannes Doerfert #include "llvm/Support/Signals.h"
36c95abe94SJohannes Doerfert #include "llvm/Support/raw_ostream.h"
37330d8983SJohannes Doerfert 
38330d8983SJohannes Doerfert #include <cstdint>
39330d8983SJohannes Doerfert #include <limits>
40330d8983SJohannes Doerfert 
41330d8983SJohannes Doerfert using namespace llvm;
42330d8983SJohannes Doerfert using namespace omp;
43330d8983SJohannes Doerfert using namespace target;
44330d8983SJohannes Doerfert using namespace plugin;
45330d8983SJohannes Doerfert 
46330d8983SJohannes Doerfert // TODO: Fix any thread safety issues for multi-threaded kernel recording.
47f42f57b5SJoseph Huber namespace llvm::omp::target::plugin {
48330d8983SJohannes Doerfert struct RecordReplayTy {
49330d8983SJohannes Doerfert 
50330d8983SJohannes Doerfert   // Describes the state of the record replay mechanism.
51330d8983SJohannes Doerfert   enum RRStatusTy { RRDeactivated = 0, RRRecording, RRReplaying };
52330d8983SJohannes Doerfert 
53330d8983SJohannes Doerfert private:
54330d8983SJohannes Doerfert   // Memory pointers for recording, replaying memory.
55330d8983SJohannes Doerfert   void *MemoryStart = nullptr;
56330d8983SJohannes Doerfert   void *MemoryPtr = nullptr;
57330d8983SJohannes Doerfert   size_t MemorySize = 0;
58330d8983SJohannes Doerfert   size_t TotalSize = 0;
59330d8983SJohannes Doerfert   GenericDeviceTy *Device = nullptr;
60330d8983SJohannes Doerfert   std::mutex AllocationLock;
61330d8983SJohannes Doerfert 
62330d8983SJohannes Doerfert   RRStatusTy Status = RRDeactivated;
63330d8983SJohannes Doerfert   bool ReplaySaveOutput = false;
64330d8983SJohannes Doerfert   bool UsedVAMap = false;
65330d8983SJohannes Doerfert   uintptr_t MemoryOffset = 0;
66330d8983SJohannes Doerfert 
67330d8983SJohannes Doerfert   // A list of all globals mapped to the device.
68330d8983SJohannes Doerfert   struct GlobalEntry {
69330d8983SJohannes Doerfert     const char *Name;
70330d8983SJohannes Doerfert     uint64_t Size;
71330d8983SJohannes Doerfert     void *Addr;
72330d8983SJohannes Doerfert   };
73330d8983SJohannes Doerfert   llvm::SmallVector<GlobalEntry> GlobalEntries{};
74330d8983SJohannes Doerfert 
75330d8983SJohannes Doerfert   void *suggestAddress(uint64_t MaxMemoryAllocation) {
76330d8983SJohannes Doerfert     // Get a valid pointer address for this system
77330d8983SJohannes Doerfert     void *Addr =
78330d8983SJohannes Doerfert         Device->allocate(1024, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT);
79330d8983SJohannes Doerfert     Device->free(Addr);
80330d8983SJohannes Doerfert     // Align Address to MaxMemoryAllocation
8108533a3eSJohannes Doerfert     Addr = (void *)utils::alignPtr((Addr), MaxMemoryAllocation);
82330d8983SJohannes Doerfert     return Addr;
83330d8983SJohannes Doerfert   }
84330d8983SJohannes Doerfert 
85330d8983SJohannes Doerfert   Error preAllocateVAMemory(uint64_t MaxMemoryAllocation, void *VAddr) {
86330d8983SJohannes Doerfert     size_t ASize = MaxMemoryAllocation;
87330d8983SJohannes Doerfert 
88330d8983SJohannes Doerfert     if (!VAddr && isRecording())
89330d8983SJohannes Doerfert       VAddr = suggestAddress(MaxMemoryAllocation);
90330d8983SJohannes Doerfert 
91330d8983SJohannes Doerfert     DP("Request %ld bytes allocated at %p\n", MaxMemoryAllocation, VAddr);
92330d8983SJohannes Doerfert 
93330d8983SJohannes Doerfert     if (auto Err = Device->memoryVAMap(&MemoryStart, VAddr, &ASize))
94330d8983SJohannes Doerfert       return Err;
95330d8983SJohannes Doerfert 
96330d8983SJohannes Doerfert     if (isReplaying() && VAddr != MemoryStart) {
97330d8983SJohannes Doerfert       return Plugin::error("Record-Replay cannot assign the"
98330d8983SJohannes Doerfert                            "requested recorded address (%p, %p)",
99330d8983SJohannes Doerfert                            VAddr, MemoryStart);
100330d8983SJohannes Doerfert     }
101330d8983SJohannes Doerfert 
102330d8983SJohannes Doerfert     INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(),
103330d8983SJohannes Doerfert          "Allocated %" PRIu64 " bytes at %p for replay.\n", ASize, MemoryStart);
104330d8983SJohannes Doerfert 
105330d8983SJohannes Doerfert     MemoryPtr = MemoryStart;
106330d8983SJohannes Doerfert     MemorySize = 0;
107330d8983SJohannes Doerfert     TotalSize = ASize;
108330d8983SJohannes Doerfert     UsedVAMap = true;
109330d8983SJohannes Doerfert     return Plugin::success();
110330d8983SJohannes Doerfert   }
111330d8983SJohannes Doerfert 
112330d8983SJohannes Doerfert   Error preAllocateHeuristic(uint64_t MaxMemoryAllocation,
113330d8983SJohannes Doerfert                              uint64_t RequiredMemoryAllocation, void *VAddr) {
114330d8983SJohannes Doerfert     const size_t MAX_MEMORY_ALLOCATION = MaxMemoryAllocation;
115330d8983SJohannes Doerfert     constexpr size_t STEP = 1024 * 1024 * 1024ULL;
116330d8983SJohannes Doerfert     MemoryStart = nullptr;
117330d8983SJohannes Doerfert     for (TotalSize = MAX_MEMORY_ALLOCATION; TotalSize > 0; TotalSize -= STEP) {
118330d8983SJohannes Doerfert       MemoryStart =
119330d8983SJohannes Doerfert           Device->allocate(TotalSize, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT);
120330d8983SJohannes Doerfert       if (MemoryStart)
121330d8983SJohannes Doerfert         break;
122330d8983SJohannes Doerfert     }
123330d8983SJohannes Doerfert     if (!MemoryStart)
124330d8983SJohannes Doerfert       return Plugin::error("Allocating record/replay memory");
125330d8983SJohannes Doerfert 
126330d8983SJohannes Doerfert     if (VAddr && VAddr != MemoryStart)
127330d8983SJohannes Doerfert       MemoryOffset = uintptr_t(VAddr) - uintptr_t(MemoryStart);
128330d8983SJohannes Doerfert 
129330d8983SJohannes Doerfert     MemoryPtr = MemoryStart;
130330d8983SJohannes Doerfert     MemorySize = 0;
131330d8983SJohannes Doerfert 
132330d8983SJohannes Doerfert     // Check if we need adjustment.
133330d8983SJohannes Doerfert     if (MemoryOffset > 0 &&
134330d8983SJohannes Doerfert         TotalSize >= RequiredMemoryAllocation + MemoryOffset) {
135330d8983SJohannes Doerfert       // If we are off but "before" the required address and with enough space,
136330d8983SJohannes Doerfert       // we just "allocate" the offset to match the required address.
137330d8983SJohannes Doerfert       MemoryPtr = (char *)MemoryPtr + MemoryOffset;
138330d8983SJohannes Doerfert       MemorySize += MemoryOffset;
139330d8983SJohannes Doerfert       MemoryOffset = 0;
140330d8983SJohannes Doerfert       assert(MemoryPtr == VAddr && "Expected offset adjustment to work");
141330d8983SJohannes Doerfert     } else if (MemoryOffset) {
142330d8983SJohannes Doerfert       // If we are off and in a situation we cannot just "waste" memory to force
143330d8983SJohannes Doerfert       // a match, we hope adjusting the arguments is sufficient.
144330d8983SJohannes Doerfert       REPORT(
145330d8983SJohannes Doerfert           "WARNING Failed to allocate replay memory at required location %p, "
146330d8983SJohannes Doerfert           "got %p, trying to offset argument pointers by %" PRIi64 "\n",
147330d8983SJohannes Doerfert           VAddr, MemoryStart, MemoryOffset);
148330d8983SJohannes Doerfert     }
149330d8983SJohannes Doerfert 
150330d8983SJohannes Doerfert     INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(),
151330d8983SJohannes Doerfert          "Allocated %" PRIu64 " bytes at %p for replay.\n", TotalSize,
152330d8983SJohannes Doerfert          MemoryStart);
153330d8983SJohannes Doerfert 
154330d8983SJohannes Doerfert     return Plugin::success();
155330d8983SJohannes Doerfert   }
156330d8983SJohannes Doerfert 
157330d8983SJohannes Doerfert   Error preallocateDeviceMemory(uint64_t DeviceMemorySize, void *ReqVAddr) {
158330d8983SJohannes Doerfert     if (Device->supportVAManagement()) {
159330d8983SJohannes Doerfert       auto Err = preAllocateVAMemory(DeviceMemorySize, ReqVAddr);
160330d8983SJohannes Doerfert       if (Err) {
161330d8983SJohannes Doerfert         REPORT("WARNING VA mapping failed, fallback to heuristic: "
162330d8983SJohannes Doerfert                "(Error: %s)\n",
163330d8983SJohannes Doerfert                toString(std::move(Err)).data());
164330d8983SJohannes Doerfert       }
165330d8983SJohannes Doerfert     }
166330d8983SJohannes Doerfert 
167330d8983SJohannes Doerfert     uint64_t DevMemSize;
168330d8983SJohannes Doerfert     if (Device->getDeviceMemorySize(DevMemSize))
169330d8983SJohannes Doerfert       return Plugin::error("Cannot determine Device Memory Size");
170330d8983SJohannes Doerfert 
171330d8983SJohannes Doerfert     return preAllocateHeuristic(DevMemSize, DeviceMemorySize, ReqVAddr);
172330d8983SJohannes Doerfert   }
173330d8983SJohannes Doerfert 
174330d8983SJohannes Doerfert   void dumpDeviceMemory(StringRef Filename) {
175330d8983SJohannes Doerfert     ErrorOr<std::unique_ptr<WritableMemoryBuffer>> DeviceMemoryMB =
176330d8983SJohannes Doerfert         WritableMemoryBuffer::getNewUninitMemBuffer(MemorySize);
177330d8983SJohannes Doerfert     if (!DeviceMemoryMB)
178330d8983SJohannes Doerfert       report_fatal_error("Error creating MemoryBuffer for device memory");
179330d8983SJohannes Doerfert 
180330d8983SJohannes Doerfert     auto Err = Device->dataRetrieve(DeviceMemoryMB.get()->getBufferStart(),
181330d8983SJohannes Doerfert                                     MemoryStart, MemorySize, nullptr);
182330d8983SJohannes Doerfert     if (Err)
183330d8983SJohannes Doerfert       report_fatal_error("Error retrieving data for target pointer");
184330d8983SJohannes Doerfert 
185330d8983SJohannes Doerfert     StringRef DeviceMemory(DeviceMemoryMB.get()->getBufferStart(), MemorySize);
186330d8983SJohannes Doerfert     std::error_code EC;
187330d8983SJohannes Doerfert     raw_fd_ostream OS(Filename, EC);
188330d8983SJohannes Doerfert     if (EC)
189330d8983SJohannes Doerfert       report_fatal_error("Error dumping memory to file " + Filename + " :" +
190330d8983SJohannes Doerfert                          EC.message());
191330d8983SJohannes Doerfert     OS << DeviceMemory;
192330d8983SJohannes Doerfert     OS.close();
193330d8983SJohannes Doerfert   }
194330d8983SJohannes Doerfert 
195330d8983SJohannes Doerfert public:
196330d8983SJohannes Doerfert   bool isRecording() const { return Status == RRStatusTy::RRRecording; }
197330d8983SJohannes Doerfert   bool isReplaying() const { return Status == RRStatusTy::RRReplaying; }
198330d8983SJohannes Doerfert   bool isRecordingOrReplaying() const {
199330d8983SJohannes Doerfert     return (Status != RRStatusTy::RRDeactivated);
200330d8983SJohannes Doerfert   }
201330d8983SJohannes Doerfert   void setStatus(RRStatusTy Status) { this->Status = Status; }
202330d8983SJohannes Doerfert   bool isSaveOutputEnabled() const { return ReplaySaveOutput; }
203330d8983SJohannes Doerfert   void addEntry(const char *Name, uint64_t Size, void *Addr) {
204330d8983SJohannes Doerfert     GlobalEntries.emplace_back(GlobalEntry{Name, Size, Addr});
205330d8983SJohannes Doerfert   }
206330d8983SJohannes Doerfert 
207330d8983SJohannes Doerfert   void saveImage(const char *Name, const DeviceImageTy &Image) {
208330d8983SJohannes Doerfert     SmallString<128> ImageName = {Name, ".image"};
209330d8983SJohannes Doerfert     std::error_code EC;
210330d8983SJohannes Doerfert     raw_fd_ostream OS(ImageName, EC);
211330d8983SJohannes Doerfert     if (EC)
212330d8983SJohannes Doerfert       report_fatal_error("Error saving image : " + StringRef(EC.message()));
213330d8983SJohannes Doerfert     if (const auto *TgtImageBitcode = Image.getTgtImageBitcode()) {
21408533a3eSJohannes Doerfert       size_t Size = utils::getPtrDiff(TgtImageBitcode->ImageEnd,
21508533a3eSJohannes Doerfert                                       TgtImageBitcode->ImageStart);
216330d8983SJohannes Doerfert       MemoryBufferRef MBR = MemoryBufferRef(
217330d8983SJohannes Doerfert           StringRef((const char *)TgtImageBitcode->ImageStart, Size), "");
218330d8983SJohannes Doerfert       OS << MBR.getBuffer();
219330d8983SJohannes Doerfert     } else {
220330d8983SJohannes Doerfert       OS << Image.getMemoryBuffer().getBuffer();
221330d8983SJohannes Doerfert     }
222330d8983SJohannes Doerfert     OS.close();
223330d8983SJohannes Doerfert   }
224330d8983SJohannes Doerfert 
225330d8983SJohannes Doerfert   void dumpGlobals(StringRef Filename, DeviceImageTy &Image) {
226330d8983SJohannes Doerfert     int32_t Size = 0;
227330d8983SJohannes Doerfert 
228330d8983SJohannes Doerfert     for (auto &OffloadEntry : GlobalEntries) {
229330d8983SJohannes Doerfert       if (!OffloadEntry.Size)
230330d8983SJohannes Doerfert         continue;
231330d8983SJohannes Doerfert       // Get the total size of the string and entry including the null byte.
232330d8983SJohannes Doerfert       Size += std::strlen(OffloadEntry.Name) + 1 + sizeof(uint32_t) +
233330d8983SJohannes Doerfert               OffloadEntry.Size;
234330d8983SJohannes Doerfert     }
235330d8983SJohannes Doerfert 
236330d8983SJohannes Doerfert     ErrorOr<std::unique_ptr<WritableMemoryBuffer>> GlobalsMB =
237330d8983SJohannes Doerfert         WritableMemoryBuffer::getNewUninitMemBuffer(Size);
238330d8983SJohannes Doerfert     if (!GlobalsMB)
239330d8983SJohannes Doerfert       report_fatal_error("Error creating MemoryBuffer for globals memory");
240330d8983SJohannes Doerfert 
241330d8983SJohannes Doerfert     void *BufferPtr = GlobalsMB.get()->getBufferStart();
242330d8983SJohannes Doerfert     for (auto &OffloadEntry : GlobalEntries) {
243330d8983SJohannes Doerfert       if (!OffloadEntry.Size)
244330d8983SJohannes Doerfert         continue;
245330d8983SJohannes Doerfert 
246330d8983SJohannes Doerfert       int32_t NameLength = std::strlen(OffloadEntry.Name) + 1;
247330d8983SJohannes Doerfert       memcpy(BufferPtr, OffloadEntry.Name, NameLength);
24808533a3eSJohannes Doerfert       BufferPtr = utils::advancePtr(BufferPtr, NameLength);
249330d8983SJohannes Doerfert 
250330d8983SJohannes Doerfert       *((uint32_t *)(BufferPtr)) = OffloadEntry.Size;
25108533a3eSJohannes Doerfert       BufferPtr = utils::advancePtr(BufferPtr, sizeof(uint32_t));
252330d8983SJohannes Doerfert 
253330d8983SJohannes Doerfert       auto Err = Plugin::success();
254330d8983SJohannes Doerfert       {
255330d8983SJohannes Doerfert         if (auto Err = Device->dataRetrieve(BufferPtr, OffloadEntry.Addr,
256330d8983SJohannes Doerfert                                             OffloadEntry.Size, nullptr))
257330d8983SJohannes Doerfert           report_fatal_error("Error retrieving data for global");
258330d8983SJohannes Doerfert       }
259330d8983SJohannes Doerfert       if (Err)
260330d8983SJohannes Doerfert         report_fatal_error("Error retrieving data for global");
26108533a3eSJohannes Doerfert       BufferPtr = utils::advancePtr(BufferPtr, OffloadEntry.Size);
262330d8983SJohannes Doerfert     }
263330d8983SJohannes Doerfert     assert(BufferPtr == GlobalsMB->get()->getBufferEnd() &&
264330d8983SJohannes Doerfert            "Buffer over/under-filled.");
26508533a3eSJohannes Doerfert     assert(Size == utils::getPtrDiff(BufferPtr,
26608533a3eSJohannes Doerfert                                      GlobalsMB->get()->getBufferStart()) &&
267330d8983SJohannes Doerfert            "Buffer size mismatch");
268330d8983SJohannes Doerfert 
269330d8983SJohannes Doerfert     StringRef GlobalsMemory(GlobalsMB.get()->getBufferStart(), Size);
270330d8983SJohannes Doerfert     std::error_code EC;
271330d8983SJohannes Doerfert     raw_fd_ostream OS(Filename, EC);
272330d8983SJohannes Doerfert     OS << GlobalsMemory;
273330d8983SJohannes Doerfert     OS.close();
274330d8983SJohannes Doerfert   }
275330d8983SJohannes Doerfert 
27654b5c76dSJohannes Doerfert   void saveKernelDescr(const char *Name, KernelLaunchParamsTy LaunchParams,
27754b5c76dSJohannes Doerfert                        int32_t NumArgs, uint64_t NumTeamsClause,
27854b5c76dSJohannes Doerfert                        uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
279330d8983SJohannes Doerfert     json::Object JsonKernelInfo;
280330d8983SJohannes Doerfert     JsonKernelInfo["Name"] = Name;
281330d8983SJohannes Doerfert     JsonKernelInfo["NumArgs"] = NumArgs;
282330d8983SJohannes Doerfert     JsonKernelInfo["NumTeamsClause"] = NumTeamsClause;
283330d8983SJohannes Doerfert     JsonKernelInfo["ThreadLimitClause"] = ThreadLimitClause;
284330d8983SJohannes Doerfert     JsonKernelInfo["LoopTripCount"] = LoopTripCount;
285330d8983SJohannes Doerfert     JsonKernelInfo["DeviceMemorySize"] = MemorySize;
286330d8983SJohannes Doerfert     JsonKernelInfo["DeviceId"] = Device->getDeviceId();
287330d8983SJohannes Doerfert     JsonKernelInfo["BumpAllocVAStart"] = (intptr_t)MemoryStart;
288330d8983SJohannes Doerfert 
289330d8983SJohannes Doerfert     json::Array JsonArgPtrs;
290330d8983SJohannes Doerfert     for (int I = 0; I < NumArgs; ++I)
29154b5c76dSJohannes Doerfert       JsonArgPtrs.push_back((intptr_t)LaunchParams.Ptrs[I]);
292330d8983SJohannes Doerfert     JsonKernelInfo["ArgPtrs"] = json::Value(std::move(JsonArgPtrs));
293330d8983SJohannes Doerfert 
294330d8983SJohannes Doerfert     json::Array JsonArgOffsets;
295330d8983SJohannes Doerfert     for (int I = 0; I < NumArgs; ++I)
296330d8983SJohannes Doerfert       JsonArgOffsets.push_back(0);
297330d8983SJohannes Doerfert     JsonKernelInfo["ArgOffsets"] = json::Value(std::move(JsonArgOffsets));
298330d8983SJohannes Doerfert 
299330d8983SJohannes Doerfert     SmallString<128> JsonFilename = {Name, ".json"};
300330d8983SJohannes Doerfert     std::error_code EC;
301330d8983SJohannes Doerfert     raw_fd_ostream JsonOS(JsonFilename.str(), EC);
302330d8983SJohannes Doerfert     if (EC)
303330d8983SJohannes Doerfert       report_fatal_error("Error saving kernel json file : " +
304330d8983SJohannes Doerfert                          StringRef(EC.message()));
305330d8983SJohannes Doerfert     JsonOS << json::Value(std::move(JsonKernelInfo));
306330d8983SJohannes Doerfert     JsonOS.close();
307330d8983SJohannes Doerfert   }
308330d8983SJohannes Doerfert 
309330d8983SJohannes Doerfert   void saveKernelInput(const char *Name, DeviceImageTy &Image) {
310330d8983SJohannes Doerfert     SmallString<128> GlobalsFilename = {Name, ".globals"};
311330d8983SJohannes Doerfert     dumpGlobals(GlobalsFilename, Image);
312330d8983SJohannes Doerfert 
313330d8983SJohannes Doerfert     SmallString<128> MemoryFilename = {Name, ".memory"};
314330d8983SJohannes Doerfert     dumpDeviceMemory(MemoryFilename);
315330d8983SJohannes Doerfert   }
316330d8983SJohannes Doerfert 
317330d8983SJohannes Doerfert   void saveKernelOutputInfo(const char *Name) {
318330d8983SJohannes Doerfert     SmallString<128> OutputFilename = {
319330d8983SJohannes Doerfert         Name, (isRecording() ? ".original.output" : ".replay.output")};
320330d8983SJohannes Doerfert     dumpDeviceMemory(OutputFilename);
321330d8983SJohannes Doerfert   }
322330d8983SJohannes Doerfert 
323330d8983SJohannes Doerfert   void *alloc(uint64_t Size) {
324330d8983SJohannes Doerfert     assert(MemoryStart && "Expected memory has been pre-allocated");
325330d8983SJohannes Doerfert     void *Alloc = nullptr;
326330d8983SJohannes Doerfert     constexpr int Alignment = 16;
327330d8983SJohannes Doerfert     // Assumes alignment is a power of 2.
328330d8983SJohannes Doerfert     int64_t AlignedSize = (Size + (Alignment - 1)) & (~(Alignment - 1));
329330d8983SJohannes Doerfert     std::lock_guard<std::mutex> LG(AllocationLock);
330330d8983SJohannes Doerfert     Alloc = MemoryPtr;
331330d8983SJohannes Doerfert     MemoryPtr = (char *)MemoryPtr + AlignedSize;
332330d8983SJohannes Doerfert     MemorySize += AlignedSize;
333330d8983SJohannes Doerfert     DP("Memory Allocator return " DPxMOD "\n", DPxPTR(Alloc));
334330d8983SJohannes Doerfert     return Alloc;
335330d8983SJohannes Doerfert   }
336330d8983SJohannes Doerfert 
337330d8983SJohannes Doerfert   Error init(GenericDeviceTy *Device, uint64_t MemSize, void *VAddr,
338330d8983SJohannes Doerfert              RRStatusTy Status, bool SaveOutput, uint64_t &ReqPtrArgOffset) {
339330d8983SJohannes Doerfert     this->Device = Device;
340330d8983SJohannes Doerfert     this->Status = Status;
341330d8983SJohannes Doerfert     this->ReplaySaveOutput = SaveOutput;
342330d8983SJohannes Doerfert 
343330d8983SJohannes Doerfert     if (auto Err = preallocateDeviceMemory(MemSize, VAddr))
344330d8983SJohannes Doerfert       return Err;
345330d8983SJohannes Doerfert 
346330d8983SJohannes Doerfert     INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(),
347330d8983SJohannes Doerfert          "Record Replay Initialized (%p)"
348330d8983SJohannes Doerfert          " as starting address, %lu Memory Size"
349330d8983SJohannes Doerfert          " and set on status %s\n",
350330d8983SJohannes Doerfert          MemoryStart, TotalSize,
351330d8983SJohannes Doerfert          Status == RRStatusTy::RRRecording ? "Recording" : "Replaying");
352330d8983SJohannes Doerfert 
353330d8983SJohannes Doerfert     // Tell the user to offset pointer arguments as the memory allocation does
354330d8983SJohannes Doerfert     // not match.
355330d8983SJohannes Doerfert     ReqPtrArgOffset = MemoryOffset;
356330d8983SJohannes Doerfert     return Plugin::success();
357330d8983SJohannes Doerfert   }
358330d8983SJohannes Doerfert 
359330d8983SJohannes Doerfert   void deinit() {
360330d8983SJohannes Doerfert     if (UsedVAMap) {
361330d8983SJohannes Doerfert       if (auto Err = Device->memoryVAUnMap(MemoryStart, TotalSize))
362330d8983SJohannes Doerfert         report_fatal_error("Error on releasing virtual memory space");
363330d8983SJohannes Doerfert     } else {
364330d8983SJohannes Doerfert       Device->free(MemoryStart);
365330d8983SJohannes Doerfert     }
366330d8983SJohannes Doerfert   }
367330d8983SJohannes Doerfert };
368f42f57b5SJoseph Huber } // namespace llvm::omp::target::plugin
369330d8983SJohannes Doerfert 
370330d8983SJohannes Doerfert // Extract the mapping of host function pointers to device function pointers
371330d8983SJohannes Doerfert // from the entry table. Functions marked as 'indirect' in OpenMP will have
372330d8983SJohannes Doerfert // offloading entries generated for them which map the host's function pointer
373330d8983SJohannes Doerfert // to a global containing the corresponding function pointer on the device.
374330d8983SJohannes Doerfert static Expected<std::pair<void *, uint64_t>>
375330d8983SJohannes Doerfert setupIndirectCallTable(GenericPluginTy &Plugin, GenericDeviceTy &Device,
376330d8983SJohannes Doerfert                        DeviceImageTy &Image) {
377330d8983SJohannes Doerfert   GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
378330d8983SJohannes Doerfert 
3796518b121SJoseph Huber   llvm::ArrayRef<llvm::offloading::EntryTy> Entries(
3806518b121SJoseph Huber       Image.getTgtImage()->EntriesBegin, Image.getTgtImage()->EntriesEnd);
381330d8983SJohannes Doerfert   llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable;
382330d8983SJohannes Doerfert   for (const auto &Entry : Entries) {
3836518b121SJoseph Huber     if (Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT))
384330d8983SJohannes Doerfert       continue;
385330d8983SJohannes Doerfert 
3866518b121SJoseph Huber     assert(Entry.Size == sizeof(void *) && "Global not a function pointer?");
387330d8983SJohannes Doerfert     auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
388330d8983SJohannes Doerfert 
3896518b121SJoseph Huber     GlobalTy DeviceGlobal(Entry.SymbolName, Entry.Size);
390330d8983SJohannes Doerfert     if (auto Err =
391330d8983SJohannes Doerfert             Handler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal))
392330d8983SJohannes Doerfert       return std::move(Err);
393330d8983SJohannes Doerfert 
3946518b121SJoseph Huber     HstPtr = Entry.Address;
395330d8983SJohannes Doerfert     if (auto Err = Device.dataRetrieve(&DevPtr, DeviceGlobal.getPtr(),
3966518b121SJoseph Huber                                        Entry.Size, nullptr))
397330d8983SJohannes Doerfert       return std::move(Err);
398330d8983SJohannes Doerfert   }
399330d8983SJohannes Doerfert 
400330d8983SJohannes Doerfert   // If we do not have any indirect globals we exit early.
401330d8983SJohannes Doerfert   if (IndirectCallTable.empty())
402330d8983SJohannes Doerfert     return std::pair{nullptr, 0};
403330d8983SJohannes Doerfert 
404330d8983SJohannes Doerfert   // Sort the array to allow for more efficient lookup of device pointers.
405330d8983SJohannes Doerfert   llvm::sort(IndirectCallTable,
406330d8983SJohannes Doerfert              [](const auto &x, const auto &y) { return x.first < y.first; });
407330d8983SJohannes Doerfert 
408330d8983SJohannes Doerfert   uint64_t TableSize =
409330d8983SJohannes Doerfert       IndirectCallTable.size() * sizeof(std::pair<void *, void *>);
410330d8983SJohannes Doerfert   void *DevicePtr = Device.allocate(TableSize, nullptr, TARGET_ALLOC_DEVICE);
411330d8983SJohannes Doerfert   if (auto Err = Device.dataSubmit(DevicePtr, IndirectCallTable.data(),
412330d8983SJohannes Doerfert                                    TableSize, nullptr))
413330d8983SJohannes Doerfert     return std::move(Err);
414330d8983SJohannes Doerfert   return std::pair<void *, uint64_t>(DevicePtr, IndirectCallTable.size());
415330d8983SJohannes Doerfert }
416330d8983SJohannes Doerfert 
417330d8983SJohannes Doerfert AsyncInfoWrapperTy::AsyncInfoWrapperTy(GenericDeviceTy &Device,
418330d8983SJohannes Doerfert                                        __tgt_async_info *AsyncInfoPtr)
419330d8983SJohannes Doerfert     : Device(Device),
420330d8983SJohannes Doerfert       AsyncInfoPtr(AsyncInfoPtr ? AsyncInfoPtr : &LocalAsyncInfo) {}
421330d8983SJohannes Doerfert 
422330d8983SJohannes Doerfert void AsyncInfoWrapperTy::finalize(Error &Err) {
423330d8983SJohannes Doerfert   assert(AsyncInfoPtr && "AsyncInfoWrapperTy already finalized");
424330d8983SJohannes Doerfert 
425330d8983SJohannes Doerfert   // If we used a local async info object we want synchronous behavior. In that
426330d8983SJohannes Doerfert   // case, and assuming the current status code is correct, we will synchronize
427330d8983SJohannes Doerfert   // explicitly when the object is deleted. Update the error with the result of
428330d8983SJohannes Doerfert   // the synchronize operation.
429330d8983SJohannes Doerfert   if (AsyncInfoPtr == &LocalAsyncInfo && LocalAsyncInfo.Queue && !Err)
430330d8983SJohannes Doerfert     Err = Device.synchronize(&LocalAsyncInfo);
431330d8983SJohannes Doerfert 
432330d8983SJohannes Doerfert   // Invalidate the wrapper object.
433330d8983SJohannes Doerfert   AsyncInfoPtr = nullptr;
434330d8983SJohannes Doerfert }
435330d8983SJohannes Doerfert 
436330d8983SJohannes Doerfert Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
437330d8983SJohannes Doerfert                             DeviceImageTy &Image) {
438330d8983SJohannes Doerfert 
439330d8983SJohannes Doerfert   ImagePtr = &Image;
440330d8983SJohannes Doerfert 
441330d8983SJohannes Doerfert   // Retrieve kernel environment object for the kernel.
442330d8983SJohannes Doerfert   GlobalTy KernelEnv(std::string(Name) + "_kernel_environment",
443330d8983SJohannes Doerfert                      sizeof(KernelEnvironment), &KernelEnvironment);
444330d8983SJohannes Doerfert   GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler();
445330d8983SJohannes Doerfert   if (auto Err =
446330d8983SJohannes Doerfert           GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) {
447330d8983SJohannes Doerfert     [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
448330d8983SJohannes Doerfert     DP("Failed to read kernel environment for '%s': %s\n"
449330d8983SJohannes Doerfert        "Using default SPMD (2) execution mode\n",
450330d8983SJohannes Doerfert        Name, ErrStr.data());
451330d8983SJohannes Doerfert     assert(KernelEnvironment.Configuration.ReductionDataSize == 0 &&
452330d8983SJohannes Doerfert            "Default initialization failed.");
453330d8983SJohannes Doerfert     IsBareKernel = true;
454330d8983SJohannes Doerfert   }
455330d8983SJohannes Doerfert 
456330d8983SJohannes Doerfert   // Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max;
457330d8983SJohannes Doerfert   MaxNumThreads = KernelEnvironment.Configuration.MaxThreads > 0
458330d8983SJohannes Doerfert                       ? std::min(KernelEnvironment.Configuration.MaxThreads,
459330d8983SJohannes Doerfert                                  int32_t(GenericDevice.getThreadLimit()))
460330d8983SJohannes Doerfert                       : GenericDevice.getThreadLimit();
461330d8983SJohannes Doerfert 
462330d8983SJohannes Doerfert   // Pref = Config.Pref > 0 ? max(Config.Pref, Device.Pref) : Device.Pref;
463330d8983SJohannes Doerfert   PreferredNumThreads =
464330d8983SJohannes Doerfert       KernelEnvironment.Configuration.MinThreads > 0
465330d8983SJohannes Doerfert           ? std::max(KernelEnvironment.Configuration.MinThreads,
466330d8983SJohannes Doerfert                      int32_t(GenericDevice.getDefaultNumThreads()))
467330d8983SJohannes Doerfert           : GenericDevice.getDefaultNumThreads();
468330d8983SJohannes Doerfert 
469330d8983SJohannes Doerfert   return initImpl(GenericDevice, Image);
470330d8983SJohannes Doerfert }
471330d8983SJohannes Doerfert 
472330d8983SJohannes Doerfert Expected<KernelLaunchEnvironmentTy *>
473330d8983SJohannes Doerfert GenericKernelTy::getKernelLaunchEnvironment(
474330d8983SJohannes Doerfert     GenericDeviceTy &GenericDevice, uint32_t Version,
475330d8983SJohannes Doerfert     AsyncInfoWrapperTy &AsyncInfoWrapper) const {
476330d8983SJohannes Doerfert   // Ctor/Dtor have no arguments, replaying uses the original kernel launch
477330d8983SJohannes Doerfert   // environment. Older versions of the compiler do not generate a kernel
478330d8983SJohannes Doerfert   // launch environment.
479f42f57b5SJoseph Huber   if (GenericDevice.Plugin.getRecordReplay().isReplaying() ||
480330d8983SJohannes Doerfert       Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
481330d8983SJohannes Doerfert     return nullptr;
482330d8983SJohannes Doerfert 
483330d8983SJohannes Doerfert   if (!KernelEnvironment.Configuration.ReductionDataSize ||
484330d8983SJohannes Doerfert       !KernelEnvironment.Configuration.ReductionBufferLength)
485330d8983SJohannes Doerfert     return reinterpret_cast<KernelLaunchEnvironmentTy *>(~0);
486330d8983SJohannes Doerfert 
487330d8983SJohannes Doerfert   // TODO: Check if the kernel needs a launch environment.
488330d8983SJohannes Doerfert   auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy),
489330d8983SJohannes Doerfert                                             /*HostPtr=*/nullptr,
490330d8983SJohannes Doerfert                                             TargetAllocTy::TARGET_ALLOC_DEVICE);
491330d8983SJohannes Doerfert   if (!AllocOrErr)
492330d8983SJohannes Doerfert     return AllocOrErr.takeError();
493330d8983SJohannes Doerfert 
494330d8983SJohannes Doerfert   // Remember to free the memory later.
495330d8983SJohannes Doerfert   AsyncInfoWrapper.freeAllocationAfterSynchronization(*AllocOrErr);
496330d8983SJohannes Doerfert 
497330d8983SJohannes Doerfert   /// Use the KLE in the __tgt_async_info to ensure a stable address for the
498330d8983SJohannes Doerfert   /// async data transfer.
499330d8983SJohannes Doerfert   auto &LocalKLE = (*AsyncInfoWrapper).KernelLaunchEnvironment;
500330d8983SJohannes Doerfert   LocalKLE = KernelLaunchEnvironment;
501330d8983SJohannes Doerfert   {
502330d8983SJohannes Doerfert     auto AllocOrErr = GenericDevice.dataAlloc(
503330d8983SJohannes Doerfert         KernelEnvironment.Configuration.ReductionDataSize *
504330d8983SJohannes Doerfert             KernelEnvironment.Configuration.ReductionBufferLength,
505330d8983SJohannes Doerfert         /*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE);
506330d8983SJohannes Doerfert     if (!AllocOrErr)
507330d8983SJohannes Doerfert       return AllocOrErr.takeError();
508330d8983SJohannes Doerfert     LocalKLE.ReductionBuffer = *AllocOrErr;
509330d8983SJohannes Doerfert     // Remember to free the memory later.
510330d8983SJohannes Doerfert     AsyncInfoWrapper.freeAllocationAfterSynchronization(*AllocOrErr);
511330d8983SJohannes Doerfert   }
512330d8983SJohannes Doerfert 
513330d8983SJohannes Doerfert   INFO(OMP_INFOTYPE_DATA_TRANSFER, GenericDevice.getDeviceId(),
514330d8983SJohannes Doerfert        "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
515330d8983SJohannes Doerfert        ", Size=%" PRId64 ", Name=KernelLaunchEnv\n",
516330d8983SJohannes Doerfert        DPxPTR(&LocalKLE), DPxPTR(*AllocOrErr),
517330d8983SJohannes Doerfert        sizeof(KernelLaunchEnvironmentTy));
518330d8983SJohannes Doerfert 
519330d8983SJohannes Doerfert   auto Err = GenericDevice.dataSubmit(*AllocOrErr, &LocalKLE,
520330d8983SJohannes Doerfert                                       sizeof(KernelLaunchEnvironmentTy),
521330d8983SJohannes Doerfert                                       AsyncInfoWrapper);
522330d8983SJohannes Doerfert   if (Err)
523330d8983SJohannes Doerfert     return Err;
524330d8983SJohannes Doerfert   return static_cast<KernelLaunchEnvironmentTy *>(*AllocOrErr);
525330d8983SJohannes Doerfert }
526330d8983SJohannes Doerfert 
527330d8983SJohannes Doerfert Error GenericKernelTy::printLaunchInfo(GenericDeviceTy &GenericDevice,
528330d8983SJohannes Doerfert                                        KernelArgsTy &KernelArgs,
52992376c3fSShilei Tian                                        uint32_t NumThreads[3],
53092376c3fSShilei Tian                                        uint32_t NumBlocks[3]) const {
531330d8983SJohannes Doerfert   INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
53292376c3fSShilei Tian        "Launching kernel %s with [%u,%u,%u] blocks and [%u,%u,%u] threads in "
53392376c3fSShilei Tian        "%s mode\n",
53492376c3fSShilei Tian        getName(), NumBlocks[0], NumBlocks[1], NumBlocks[2], NumThreads[0],
53592376c3fSShilei Tian        NumThreads[1], NumThreads[2], getExecutionModeName());
536330d8983SJohannes Doerfert   return printLaunchInfoDetails(GenericDevice, KernelArgs, NumThreads,
537330d8983SJohannes Doerfert                                 NumBlocks);
538330d8983SJohannes Doerfert }
539330d8983SJohannes Doerfert 
540330d8983SJohannes Doerfert Error GenericKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
541330d8983SJohannes Doerfert                                               KernelArgsTy &KernelArgs,
54292376c3fSShilei Tian                                               uint32_t NumThreads[3],
54392376c3fSShilei Tian                                               uint32_t NumBlocks[3]) const {
544330d8983SJohannes Doerfert   return Plugin::success();
545330d8983SJohannes Doerfert }
546330d8983SJohannes Doerfert 
547330d8983SJohannes Doerfert Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
548330d8983SJohannes Doerfert                               ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs,
549330d8983SJohannes Doerfert                               AsyncInfoWrapperTy &AsyncInfoWrapper) const {
550330d8983SJohannes Doerfert   llvm::SmallVector<void *, 16> Args;
551330d8983SJohannes Doerfert   llvm::SmallVector<void *, 16> Ptrs;
552330d8983SJohannes Doerfert 
553330d8983SJohannes Doerfert   auto KernelLaunchEnvOrErr = getKernelLaunchEnvironment(
554330d8983SJohannes Doerfert       GenericDevice, KernelArgs.Version, AsyncInfoWrapper);
555330d8983SJohannes Doerfert   if (!KernelLaunchEnvOrErr)
556330d8983SJohannes Doerfert     return KernelLaunchEnvOrErr.takeError();
557330d8983SJohannes Doerfert 
55880525dfcSJohannes Doerfert   KernelLaunchParamsTy LaunchParams;
55980525dfcSJohannes Doerfert 
56080525dfcSJohannes Doerfert   // Kernel languages don't use indirection.
56180525dfcSJohannes Doerfert   if (KernelArgs.Flags.IsCUDA) {
56280525dfcSJohannes Doerfert     LaunchParams =
56380525dfcSJohannes Doerfert         *reinterpret_cast<KernelLaunchParamsTy *>(KernelArgs.ArgPtrs);
56480525dfcSJohannes Doerfert   } else {
56580525dfcSJohannes Doerfert     LaunchParams =
56680525dfcSJohannes Doerfert         prepareArgs(GenericDevice, ArgPtrs, ArgOffsets, KernelArgs.NumArgs,
56780525dfcSJohannes Doerfert                     Args, Ptrs, *KernelLaunchEnvOrErr);
56880525dfcSJohannes Doerfert   }
569330d8983SJohannes Doerfert 
57092376c3fSShilei Tian   uint32_t NumThreads[3] = {KernelArgs.ThreadLimit[0],
57192376c3fSShilei Tian                             KernelArgs.ThreadLimit[1],
57292376c3fSShilei Tian                             KernelArgs.ThreadLimit[2]};
57392376c3fSShilei Tian   uint32_t NumBlocks[3] = {KernelArgs.NumTeams[0], KernelArgs.NumTeams[1],
57492376c3fSShilei Tian                            KernelArgs.NumTeams[2]};
57592376c3fSShilei Tian   if (!IsBareKernel) {
57692376c3fSShilei Tian     NumThreads[0] = getNumThreads(GenericDevice, NumThreads);
57792376c3fSShilei Tian     NumBlocks[0] = getNumBlocks(GenericDevice, NumBlocks, KernelArgs.Tripcount,
57892376c3fSShilei Tian                                 NumThreads[0], KernelArgs.ThreadLimit[0] > 0);
57992376c3fSShilei Tian   }
580330d8983SJohannes Doerfert 
581330d8983SJohannes Doerfert   // Record the kernel description after we modified the argument count and num
582330d8983SJohannes Doerfert   // blocks/threads.
583f42f57b5SJoseph Huber   RecordReplayTy &RecordReplay = GenericDevice.Plugin.getRecordReplay();
584330d8983SJohannes Doerfert   if (RecordReplay.isRecording()) {
585330d8983SJohannes Doerfert     RecordReplay.saveImage(getName(), getImage());
586330d8983SJohannes Doerfert     RecordReplay.saveKernelInput(getName(), getImage());
58754b5c76dSJohannes Doerfert     RecordReplay.saveKernelDescr(getName(), LaunchParams, KernelArgs.NumArgs,
58892376c3fSShilei Tian                                  NumBlocks[0], NumThreads[0],
58992376c3fSShilei Tian                                  KernelArgs.Tripcount);
590330d8983SJohannes Doerfert   }
591330d8983SJohannes Doerfert 
592330d8983SJohannes Doerfert   if (auto Err =
593330d8983SJohannes Doerfert           printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
594330d8983SJohannes Doerfert     return Err;
595330d8983SJohannes Doerfert 
596330d8983SJohannes Doerfert   return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs,
59754b5c76dSJohannes Doerfert                     LaunchParams, AsyncInfoWrapper);
598330d8983SJohannes Doerfert }
599330d8983SJohannes Doerfert 
60054b5c76dSJohannes Doerfert KernelLaunchParamsTy GenericKernelTy::prepareArgs(
601330d8983SJohannes Doerfert     GenericDeviceTy &GenericDevice, void **ArgPtrs, ptrdiff_t *ArgOffsets,
602330d8983SJohannes Doerfert     uint32_t &NumArgs, llvm::SmallVectorImpl<void *> &Args,
603330d8983SJohannes Doerfert     llvm::SmallVectorImpl<void *> &Ptrs,
604330d8983SJohannes Doerfert     KernelLaunchEnvironmentTy *KernelLaunchEnvironment) const {
605330d8983SJohannes Doerfert   uint32_t KLEOffset = !!KernelLaunchEnvironment;
606330d8983SJohannes Doerfert   NumArgs += KLEOffset;
607330d8983SJohannes Doerfert 
608330d8983SJohannes Doerfert   if (NumArgs == 0)
60954b5c76dSJohannes Doerfert     return KernelLaunchParamsTy{};
610330d8983SJohannes Doerfert 
611330d8983SJohannes Doerfert   Args.resize(NumArgs);
612330d8983SJohannes Doerfert   Ptrs.resize(NumArgs);
613330d8983SJohannes Doerfert 
614330d8983SJohannes Doerfert   if (KernelLaunchEnvironment) {
61554b5c76dSJohannes Doerfert     Args[0] = KernelLaunchEnvironment;
61654b5c76dSJohannes Doerfert     Ptrs[0] = &Args[0];
617330d8983SJohannes Doerfert   }
618330d8983SJohannes Doerfert 
61981d20d86SJoseph Huber   for (uint32_t I = KLEOffset; I < NumArgs; ++I) {
62054b5c76dSJohannes Doerfert     Args[I] =
621330d8983SJohannes Doerfert         (void *)((intptr_t)ArgPtrs[I - KLEOffset] + ArgOffsets[I - KLEOffset]);
62254b5c76dSJohannes Doerfert     Ptrs[I] = &Args[I];
623330d8983SJohannes Doerfert   }
62454b5c76dSJohannes Doerfert   return KernelLaunchParamsTy{sizeof(void *) * NumArgs, &Args[0], &Ptrs[0]};
625330d8983SJohannes Doerfert }
626330d8983SJohannes Doerfert 
627330d8983SJohannes Doerfert uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
628330d8983SJohannes Doerfert                                         uint32_t ThreadLimitClause[3]) const {
62992376c3fSShilei Tian   assert(!IsBareKernel && "bare kernel should not call this function");
630330d8983SJohannes Doerfert 
63192376c3fSShilei Tian   assert(ThreadLimitClause[1] == 1 && ThreadLimitClause[2] == 1 &&
63292376c3fSShilei Tian          "Multi dimensional launch not supported yet.");
633330d8983SJohannes Doerfert 
634330d8983SJohannes Doerfert   if (ThreadLimitClause[0] > 0 && isGenericMode())
635330d8983SJohannes Doerfert     ThreadLimitClause[0] += GenericDevice.getWarpSize();
636330d8983SJohannes Doerfert 
637330d8983SJohannes Doerfert   return std::min(MaxNumThreads, (ThreadLimitClause[0] > 0)
638330d8983SJohannes Doerfert                                      ? ThreadLimitClause[0]
639330d8983SJohannes Doerfert                                      : PreferredNumThreads);
640330d8983SJohannes Doerfert }
641330d8983SJohannes Doerfert 
64292376c3fSShilei Tian uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
643330d8983SJohannes Doerfert                                        uint32_t NumTeamsClause[3],
644330d8983SJohannes Doerfert                                        uint64_t LoopTripCount,
645330d8983SJohannes Doerfert                                        uint32_t &NumThreads,
646330d8983SJohannes Doerfert                                        bool IsNumThreadsFromUser) const {
64792376c3fSShilei Tian   assert(!IsBareKernel && "bare kernel should not call this function");
648330d8983SJohannes Doerfert 
64992376c3fSShilei Tian   assert(NumTeamsClause[1] == 1 && NumTeamsClause[2] == 1 &&
65092376c3fSShilei Tian          "Multi dimensional launch not supported yet.");
651330d8983SJohannes Doerfert 
652330d8983SJohannes Doerfert   if (NumTeamsClause[0] > 0) {
653330d8983SJohannes Doerfert     // TODO: We need to honor any value and consequently allow more than the
654330d8983SJohannes Doerfert     // block limit. For this we might need to start multiple kernels or let the
655330d8983SJohannes Doerfert     // blocks start again until the requested number has been started.
656330d8983SJohannes Doerfert     return std::min(NumTeamsClause[0], GenericDevice.getBlockLimit());
657330d8983SJohannes Doerfert   }
658330d8983SJohannes Doerfert 
659330d8983SJohannes Doerfert   uint64_t DefaultNumBlocks = GenericDevice.getDefaultNumBlocks();
660330d8983SJohannes Doerfert   uint64_t TripCountNumBlocks = std::numeric_limits<uint64_t>::max();
661330d8983SJohannes Doerfert   if (LoopTripCount > 0) {
662330d8983SJohannes Doerfert     if (isSPMDMode()) {
663330d8983SJohannes Doerfert       // We have a combined construct, i.e. `target teams distribute
664330d8983SJohannes Doerfert       // parallel for [simd]`. We launch so many teams so that each thread
665330d8983SJohannes Doerfert       // will execute one iteration of the loop; rounded up to the nearest
666330d8983SJohannes Doerfert       // integer. However, if that results in too few teams, we artificially
667330d8983SJohannes Doerfert       // reduce the thread count per team to increase the outer parallelism.
668330d8983SJohannes Doerfert       auto MinThreads = GenericDevice.getMinThreadsForLowTripCountLoop();
669330d8983SJohannes Doerfert       MinThreads = std::min(MinThreads, NumThreads);
670330d8983SJohannes Doerfert 
671330d8983SJohannes Doerfert       // Honor the thread_limit clause; only lower the number of threads.
672330d8983SJohannes Doerfert       [[maybe_unused]] auto OldNumThreads = NumThreads;
673330d8983SJohannes Doerfert       if (LoopTripCount >= DefaultNumBlocks * NumThreads ||
674330d8983SJohannes Doerfert           IsNumThreadsFromUser) {
675330d8983SJohannes Doerfert         // Enough parallelism for teams and threads.
676330d8983SJohannes Doerfert         TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
677330d8983SJohannes Doerfert         assert(IsNumThreadsFromUser ||
678330d8983SJohannes Doerfert                TripCountNumBlocks >= DefaultNumBlocks &&
679330d8983SJohannes Doerfert                    "Expected sufficient outer parallelism.");
680330d8983SJohannes Doerfert       } else if (LoopTripCount >= DefaultNumBlocks * MinThreads) {
681330d8983SJohannes Doerfert         // Enough parallelism for teams, limit threads.
682330d8983SJohannes Doerfert 
683330d8983SJohannes Doerfert         // This case is hard; for now, we force "full warps":
684330d8983SJohannes Doerfert         // First, compute a thread count assuming DefaultNumBlocks.
685330d8983SJohannes Doerfert         auto NumThreadsDefaultBlocks =
686330d8983SJohannes Doerfert             (LoopTripCount + DefaultNumBlocks - 1) / DefaultNumBlocks;
687330d8983SJohannes Doerfert         // Now get a power of two that is larger or equal.
688330d8983SJohannes Doerfert         auto NumThreadsDefaultBlocksP2 =
689330d8983SJohannes Doerfert             llvm::PowerOf2Ceil(NumThreadsDefaultBlocks);
690330d8983SJohannes Doerfert         // Do not increase a thread limit given be the user.
691330d8983SJohannes Doerfert         NumThreads = std::min(NumThreads, uint32_t(NumThreadsDefaultBlocksP2));
692330d8983SJohannes Doerfert         assert(NumThreads >= MinThreads &&
693330d8983SJohannes Doerfert                "Expected sufficient inner parallelism.");
694330d8983SJohannes Doerfert         TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
695330d8983SJohannes Doerfert       } else {
696330d8983SJohannes Doerfert         // Not enough parallelism for teams and threads, limit both.
697330d8983SJohannes Doerfert         NumThreads = std::min(NumThreads, MinThreads);
698330d8983SJohannes Doerfert         TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
699330d8983SJohannes Doerfert       }
700330d8983SJohannes Doerfert 
701330d8983SJohannes Doerfert       assert(NumThreads * TripCountNumBlocks >= LoopTripCount &&
702330d8983SJohannes Doerfert              "Expected sufficient parallelism");
703330d8983SJohannes Doerfert       assert(OldNumThreads >= NumThreads &&
704330d8983SJohannes Doerfert              "Number of threads cannot be increased!");
705330d8983SJohannes Doerfert     } else {
706330d8983SJohannes Doerfert       assert((isGenericMode() || isGenericSPMDMode()) &&
707330d8983SJohannes Doerfert              "Unexpected execution mode!");
708330d8983SJohannes Doerfert       // If we reach this point, then we have a non-combined construct, i.e.
709330d8983SJohannes Doerfert       // `teams distribute` with a nested `parallel for` and each team is
710330d8983SJohannes Doerfert       // assigned one iteration of the `distribute` loop. E.g.:
711330d8983SJohannes Doerfert       //
712330d8983SJohannes Doerfert       // #pragma omp target teams distribute
713330d8983SJohannes Doerfert       // for(...loop_tripcount...) {
714330d8983SJohannes Doerfert       //   #pragma omp parallel for
715330d8983SJohannes Doerfert       //   for(...) {}
716330d8983SJohannes Doerfert       // }
717330d8983SJohannes Doerfert       //
718330d8983SJohannes Doerfert       // Threads within a team will execute the iterations of the `parallel`
719330d8983SJohannes Doerfert       // loop.
720330d8983SJohannes Doerfert       TripCountNumBlocks = LoopTripCount;
721330d8983SJohannes Doerfert     }
722330d8983SJohannes Doerfert   }
723597d2f76STim Gymnich 
724597d2f76STim Gymnich   uint32_t PreferredNumBlocks = TripCountNumBlocks;
725330d8983SJohannes Doerfert   // If the loops are long running we rather reuse blocks than spawn too many.
726597d2f76STim Gymnich   if (GenericDevice.getReuseBlocksForHighTripCount())
727597d2f76STim Gymnich     PreferredNumBlocks = std::min(TripCountNumBlocks, DefaultNumBlocks);
728330d8983SJohannes Doerfert   return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
729330d8983SJohannes Doerfert }
730330d8983SJohannes Doerfert 
731330d8983SJohannes Doerfert GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
732330d8983SJohannes Doerfert                                  int32_t NumDevices,
733330d8983SJohannes Doerfert                                  const llvm::omp::GV &OMPGridValues)
734330d8983SJohannes Doerfert     : Plugin(Plugin), MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
735330d8983SJohannes Doerfert       OMP_NumTeams("OMP_NUM_TEAMS"),
736330d8983SJohannes Doerfert       OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
737330d8983SJohannes Doerfert       OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
738330d8983SJohannes Doerfert       OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"),
739330d8983SJohannes Doerfert       // Do not initialize the following two envars since they depend on the
740330d8983SJohannes Doerfert       // device initialization. These cannot be consulted until the device is
741330d8983SJohannes Doerfert       // initialized correctly. We intialize them in GenericDeviceTy::init().
742330d8983SJohannes Doerfert       OMPX_TargetStackSize(), OMPX_TargetHeapSize(),
743330d8983SJohannes Doerfert       // By default, the initial number of streams and events is 1.
744330d8983SJohannes Doerfert       OMPX_InitialNumStreams("LIBOMPTARGET_NUM_INITIAL_STREAMS", 1),
745330d8983SJohannes Doerfert       OMPX_InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", 1),
746330d8983SJohannes Doerfert       DeviceId(DeviceId), GridValues(OMPGridValues),
747330d8983SJohannes Doerfert       PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(),
748330d8983SJohannes Doerfert       PinnedAllocs(*this), RPCServer(nullptr) {
749330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT
750330d8983SJohannes Doerfert   OmptInitialized.store(false);
751330d8983SJohannes Doerfert   // Bind the callbacks to this device's member functions
752330d8983SJohannes Doerfert #define bindOmptCallback(Name, Type, Code)                                     \
753330d8983SJohannes Doerfert   if (ompt::Initialized && ompt::lookupCallbackByCode) {                       \
754330d8983SJohannes Doerfert     ompt::lookupCallbackByCode((ompt_callbacks_t)(Code),                       \
755330d8983SJohannes Doerfert                                ((ompt_callback_t *)&(Name##_fn)));             \
756330d8983SJohannes Doerfert     DP("OMPT: class bound %s=%p\n", #Name, ((void *)(uint64_t)Name##_fn));     \
757330d8983SJohannes Doerfert   }
758330d8983SJohannes Doerfert 
759330d8983SJohannes Doerfert   FOREACH_OMPT_DEVICE_EVENT(bindOmptCallback);
760330d8983SJohannes Doerfert #undef bindOmptCallback
761330d8983SJohannes Doerfert 
762330d8983SJohannes Doerfert #endif
763330d8983SJohannes Doerfert }
764330d8983SJohannes Doerfert 
765330d8983SJohannes Doerfert Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
766330d8983SJohannes Doerfert   if (auto Err = initImpl(Plugin))
767330d8983SJohannes Doerfert     return Err;
768330d8983SJohannes Doerfert 
769330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT
770330d8983SJohannes Doerfert   if (ompt::Initialized) {
771330d8983SJohannes Doerfert     bool ExpectedStatus = false;
772330d8983SJohannes Doerfert     if (OmptInitialized.compare_exchange_strong(ExpectedStatus, true))
773435aa766SJoseph Huber       performOmptCallback(device_initialize, Plugin.getUserId(DeviceId),
774330d8983SJohannes Doerfert                           /*type=*/getComputeUnitKind().c_str(),
775330d8983SJohannes Doerfert                           /*device=*/reinterpret_cast<ompt_device_t *>(this),
776330d8983SJohannes Doerfert                           /*lookup=*/ompt::lookupCallbackByName,
777330d8983SJohannes Doerfert                           /*documentation=*/nullptr);
778330d8983SJohannes Doerfert   }
779330d8983SJohannes Doerfert #endif
780330d8983SJohannes Doerfert 
781330d8983SJohannes Doerfert   // Read and reinitialize the envars that depend on the device initialization.
782330d8983SJohannes Doerfert   // Notice these two envars may change the stack size and heap size of the
783330d8983SJohannes Doerfert   // device, so they need the device properly initialized.
784330d8983SJohannes Doerfert   auto StackSizeEnvarOrErr = UInt64Envar::create(
785330d8983SJohannes Doerfert       "LIBOMPTARGET_STACK_SIZE",
786330d8983SJohannes Doerfert       [this](uint64_t &V) -> Error { return getDeviceStackSize(V); },
787330d8983SJohannes Doerfert       [this](uint64_t V) -> Error { return setDeviceStackSize(V); });
788330d8983SJohannes Doerfert   if (!StackSizeEnvarOrErr)
789330d8983SJohannes Doerfert     return StackSizeEnvarOrErr.takeError();
790330d8983SJohannes Doerfert   OMPX_TargetStackSize = std::move(*StackSizeEnvarOrErr);
791330d8983SJohannes Doerfert 
792330d8983SJohannes Doerfert   auto HeapSizeEnvarOrErr = UInt64Envar::create(
793330d8983SJohannes Doerfert       "LIBOMPTARGET_HEAP_SIZE",
794330d8983SJohannes Doerfert       [this](uint64_t &V) -> Error { return getDeviceHeapSize(V); },
795330d8983SJohannes Doerfert       [this](uint64_t V) -> Error { return setDeviceHeapSize(V); });
796330d8983SJohannes Doerfert   if (!HeapSizeEnvarOrErr)
797330d8983SJohannes Doerfert     return HeapSizeEnvarOrErr.takeError();
798330d8983SJohannes Doerfert   OMPX_TargetHeapSize = std::move(*HeapSizeEnvarOrErr);
799330d8983SJohannes Doerfert 
800330d8983SJohannes Doerfert   // Update the maximum number of teams and threads after the device
801330d8983SJohannes Doerfert   // initialization sets the corresponding hardware limit.
802330d8983SJohannes Doerfert   if (OMP_NumTeams > 0)
803330d8983SJohannes Doerfert     GridValues.GV_Max_Teams =
804330d8983SJohannes Doerfert         std::min(GridValues.GV_Max_Teams, uint32_t(OMP_NumTeams));
805330d8983SJohannes Doerfert 
806330d8983SJohannes Doerfert   if (OMP_TeamsThreadLimit > 0)
807330d8983SJohannes Doerfert     GridValues.GV_Max_WG_Size =
808330d8983SJohannes Doerfert         std::min(GridValues.GV_Max_WG_Size, uint32_t(OMP_TeamsThreadLimit));
809330d8983SJohannes Doerfert 
810330d8983SJohannes Doerfert   // Enable the memory manager if required.
811330d8983SJohannes Doerfert   auto [ThresholdMM, EnableMM] = MemoryManagerTy::getSizeThresholdFromEnv();
812330d8983SJohannes Doerfert   if (EnableMM)
813330d8983SJohannes Doerfert     MemoryManager = new MemoryManagerTy(*this, ThresholdMM);
814330d8983SJohannes Doerfert 
815330d8983SJohannes Doerfert   return Plugin::success();
816330d8983SJohannes Doerfert }
817330d8983SJohannes Doerfert 
818330d8983SJohannes Doerfert Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
819330d8983SJohannes Doerfert   for (DeviceImageTy *Image : LoadedImages)
820330d8983SJohannes Doerfert     if (auto Err = callGlobalDestructors(Plugin, *Image))
821330d8983SJohannes Doerfert       return Err;
822330d8983SJohannes Doerfert 
823330d8983SJohannes Doerfert   if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
824330d8983SJohannes Doerfert     GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
825330d8983SJohannes Doerfert     for (auto *Image : LoadedImages) {
826330d8983SJohannes Doerfert       DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
827330d8983SJohannes Doerfert       GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
828330d8983SJohannes Doerfert                              sizeof(DeviceMemoryPoolTrackingTy),
829330d8983SJohannes Doerfert                              &ImageDeviceMemoryPoolTracking);
830330d8983SJohannes Doerfert       if (auto Err =
831330d8983SJohannes Doerfert               GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
832330d8983SJohannes Doerfert         consumeError(std::move(Err));
833330d8983SJohannes Doerfert         continue;
834330d8983SJohannes Doerfert       }
835330d8983SJohannes Doerfert       DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
836330d8983SJohannes Doerfert     }
837330d8983SJohannes Doerfert 
838330d8983SJohannes Doerfert     // TODO: Write this by default into a file.
839330d8983SJohannes Doerfert     printf("\n\n|-----------------------\n"
840330d8983SJohannes Doerfert            "| Device memory tracker:\n"
841330d8983SJohannes Doerfert            "|-----------------------\n"
842330d8983SJohannes Doerfert            "| #Allocations: %lu\n"
843330d8983SJohannes Doerfert            "| Byes allocated: %lu\n"
844330d8983SJohannes Doerfert            "| Minimal allocation: %lu\n"
845330d8983SJohannes Doerfert            "| Maximal allocation: %lu\n"
846330d8983SJohannes Doerfert            "|-----------------------\n\n\n",
847330d8983SJohannes Doerfert            DeviceMemoryPoolTracking.NumAllocations,
848330d8983SJohannes Doerfert            DeviceMemoryPoolTracking.AllocationTotal,
849330d8983SJohannes Doerfert            DeviceMemoryPoolTracking.AllocationMin,
850330d8983SJohannes Doerfert            DeviceMemoryPoolTracking.AllocationMax);
851330d8983SJohannes Doerfert   }
852330d8983SJohannes Doerfert 
853fde2d23eSEthan Luis McDonough   for (auto *Image : LoadedImages) {
854fde2d23eSEthan Luis McDonough     GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
855fde2d23eSEthan Luis McDonough     if (!Handler.hasProfilingGlobals(*this, *Image))
856fde2d23eSEthan Luis McDonough       continue;
857fde2d23eSEthan Luis McDonough 
858fde2d23eSEthan Luis McDonough     GPUProfGlobals profdata;
859fde2d23eSEthan Luis McDonough     auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
860fde2d23eSEthan Luis McDonough     if (!ProfOrErr)
861fde2d23eSEthan Luis McDonough       return ProfOrErr.takeError();
862fde2d23eSEthan Luis McDonough 
863fde2d23eSEthan Luis McDonough     // TODO: write data to profiling file
864fde2d23eSEthan Luis McDonough     ProfOrErr->dump();
865fde2d23eSEthan Luis McDonough   }
866fde2d23eSEthan Luis McDonough 
867330d8983SJohannes Doerfert   // Delete the memory manager before deinitializing the device. Otherwise,
868330d8983SJohannes Doerfert   // we may delete device allocations after the device is deinitialized.
869330d8983SJohannes Doerfert   if (MemoryManager)
870330d8983SJohannes Doerfert     delete MemoryManager;
871330d8983SJohannes Doerfert   MemoryManager = nullptr;
872330d8983SJohannes Doerfert 
873f42f57b5SJoseph Huber   RecordReplayTy &RecordReplay = Plugin.getRecordReplay();
874330d8983SJohannes Doerfert   if (RecordReplay.isRecordingOrReplaying())
875330d8983SJohannes Doerfert     RecordReplay.deinit();
876330d8983SJohannes Doerfert 
877330d8983SJohannes Doerfert   if (RPCServer)
878330d8983SJohannes Doerfert     if (auto Err = RPCServer->deinitDevice(*this))
879330d8983SJohannes Doerfert       return Err;
880330d8983SJohannes Doerfert 
881330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT
882330d8983SJohannes Doerfert   if (ompt::Initialized) {
883330d8983SJohannes Doerfert     bool ExpectedStatus = true;
884330d8983SJohannes Doerfert     if (OmptInitialized.compare_exchange_strong(ExpectedStatus, false))
885435aa766SJoseph Huber       performOmptCallback(device_finalize, Plugin.getUserId(DeviceId));
886330d8983SJohannes Doerfert   }
887330d8983SJohannes Doerfert #endif
888330d8983SJohannes Doerfert 
889330d8983SJohannes Doerfert   return deinitImpl();
890330d8983SJohannes Doerfert }
891330d8983SJohannes Doerfert Expected<DeviceImageTy *>
892330d8983SJohannes Doerfert GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
893330d8983SJohannes Doerfert                             const __tgt_device_image *InputTgtImage) {
894330d8983SJohannes Doerfert   assert(InputTgtImage && "Expected non-null target image");
895330d8983SJohannes Doerfert   DP("Load data from image " DPxMOD "\n", DPxPTR(InputTgtImage->ImageStart));
896330d8983SJohannes Doerfert 
897330d8983SJohannes Doerfert   auto PostJITImageOrErr = Plugin.getJIT().process(*InputTgtImage, *this);
898330d8983SJohannes Doerfert   if (!PostJITImageOrErr) {
899330d8983SJohannes Doerfert     auto Err = PostJITImageOrErr.takeError();
900330d8983SJohannes Doerfert     REPORT("Failure to jit IR image %p on device %d: %s\n", InputTgtImage,
901330d8983SJohannes Doerfert            DeviceId, toString(std::move(Err)).data());
902330d8983SJohannes Doerfert     return nullptr;
903330d8983SJohannes Doerfert   }
904330d8983SJohannes Doerfert 
905330d8983SJohannes Doerfert   // Load the binary and allocate the image object. Use the next available id
906330d8983SJohannes Doerfert   // for the image id, which is the number of previously loaded images.
907330d8983SJohannes Doerfert   auto ImageOrErr =
908330d8983SJohannes Doerfert       loadBinaryImpl(PostJITImageOrErr.get(), LoadedImages.size());
909330d8983SJohannes Doerfert   if (!ImageOrErr)
910330d8983SJohannes Doerfert     return ImageOrErr.takeError();
911330d8983SJohannes Doerfert 
912330d8983SJohannes Doerfert   DeviceImageTy *Image = *ImageOrErr;
913330d8983SJohannes Doerfert   assert(Image != nullptr && "Invalid image");
914330d8983SJohannes Doerfert   if (InputTgtImage != PostJITImageOrErr.get())
915330d8983SJohannes Doerfert     Image->setTgtImageBitcode(InputTgtImage);
916330d8983SJohannes Doerfert 
917330d8983SJohannes Doerfert   // Add the image to list.
918330d8983SJohannes Doerfert   LoadedImages.push_back(Image);
919330d8983SJohannes Doerfert 
920330d8983SJohannes Doerfert   // Setup the device environment if needed.
921330d8983SJohannes Doerfert   if (auto Err = setupDeviceEnvironment(Plugin, *Image))
922330d8983SJohannes Doerfert     return std::move(Err);
923330d8983SJohannes Doerfert 
924330d8983SJohannes Doerfert   // Setup the global device memory pool if needed.
925f42f57b5SJoseph Huber   if (!Plugin.getRecordReplay().isReplaying() &&
926f42f57b5SJoseph Huber       shouldSetupDeviceMemoryPool()) {
927330d8983SJohannes Doerfert     uint64_t HeapSize;
928330d8983SJohannes Doerfert     auto SizeOrErr = getDeviceHeapSize(HeapSize);
929330d8983SJohannes Doerfert     if (SizeOrErr) {
930330d8983SJohannes Doerfert       REPORT("No global device memory pool due to error: %s\n",
931330d8983SJohannes Doerfert              toString(std::move(SizeOrErr)).data());
932330d8983SJohannes Doerfert     } else if (auto Err = setupDeviceMemoryPool(Plugin, *Image, HeapSize))
933330d8983SJohannes Doerfert       return std::move(Err);
934330d8983SJohannes Doerfert   }
935330d8983SJohannes Doerfert 
936330d8983SJohannes Doerfert   if (auto Err = setupRPCServer(Plugin, *Image))
937330d8983SJohannes Doerfert     return std::move(Err);
938330d8983SJohannes Doerfert 
939330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT
940330d8983SJohannes Doerfert   if (ompt::Initialized) {
941330d8983SJohannes Doerfert     size_t Bytes =
94208533a3eSJohannes Doerfert         utils::getPtrDiff(InputTgtImage->ImageEnd, InputTgtImage->ImageStart);
943330d8983SJohannes Doerfert     performOmptCallback(
944435aa766SJoseph Huber         device_load, Plugin.getUserId(DeviceId),
945330d8983SJohannes Doerfert         /*FileName=*/nullptr, /*FileOffset=*/0, /*VmaInFile=*/nullptr,
946330d8983SJohannes Doerfert         /*ImgSize=*/Bytes, /*HostAddr=*/InputTgtImage->ImageStart,
947330d8983SJohannes Doerfert         /*DeviceAddr=*/nullptr, /* FIXME: ModuleId */ 0);
948330d8983SJohannes Doerfert   }
949330d8983SJohannes Doerfert #endif
950330d8983SJohannes Doerfert 
951330d8983SJohannes Doerfert   // Call any global constructors present on the device.
952330d8983SJohannes Doerfert   if (auto Err = callGlobalConstructors(Plugin, *Image))
953330d8983SJohannes Doerfert     return std::move(Err);
954330d8983SJohannes Doerfert 
955330d8983SJohannes Doerfert   // Return the pointer to the table of entries.
956330d8983SJohannes Doerfert   return Image;
957330d8983SJohannes Doerfert }
958330d8983SJohannes Doerfert 
959330d8983SJohannes Doerfert Error GenericDeviceTy::setupDeviceEnvironment(GenericPluginTy &Plugin,
960330d8983SJohannes Doerfert                                               DeviceImageTy &Image) {
961330d8983SJohannes Doerfert   // There are some plugins that do not need this step.
962330d8983SJohannes Doerfert   if (!shouldSetupDeviceEnvironment())
963330d8983SJohannes Doerfert     return Plugin::success();
964330d8983SJohannes Doerfert 
965330d8983SJohannes Doerfert   // Obtain a table mapping host function pointers to device function pointers.
966330d8983SJohannes Doerfert   auto CallTablePairOrErr = setupIndirectCallTable(Plugin, *this, Image);
967330d8983SJohannes Doerfert   if (!CallTablePairOrErr)
968330d8983SJohannes Doerfert     return CallTablePairOrErr.takeError();
969330d8983SJohannes Doerfert 
970330d8983SJohannes Doerfert   DeviceEnvironmentTy DeviceEnvironment;
971330d8983SJohannes Doerfert   DeviceEnvironment.DeviceDebugKind = OMPX_DebugKind;
972330d8983SJohannes Doerfert   DeviceEnvironment.NumDevices = Plugin.getNumDevices();
973330d8983SJohannes Doerfert   // TODO: The device ID used here is not the real device ID used by OpenMP.
974330d8983SJohannes Doerfert   DeviceEnvironment.DeviceNum = DeviceId;
975330d8983SJohannes Doerfert   DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize;
976330d8983SJohannes Doerfert   DeviceEnvironment.ClockFrequency = getClockFrequency();
977330d8983SJohannes Doerfert   DeviceEnvironment.IndirectCallTable =
978330d8983SJohannes Doerfert       reinterpret_cast<uintptr_t>(CallTablePairOrErr->first);
979330d8983SJohannes Doerfert   DeviceEnvironment.IndirectCallTableSize = CallTablePairOrErr->second;
980330d8983SJohannes Doerfert   DeviceEnvironment.HardwareParallelism = getHardwareParallelism();
981330d8983SJohannes Doerfert 
982330d8983SJohannes Doerfert   // Create the metainfo of the device environment global.
983330d8983SJohannes Doerfert   GlobalTy DevEnvGlobal("__omp_rtl_device_environment",
984330d8983SJohannes Doerfert                         sizeof(DeviceEnvironmentTy), &DeviceEnvironment);
985330d8983SJohannes Doerfert 
986330d8983SJohannes Doerfert   // Write device environment values to the device.
987330d8983SJohannes Doerfert   GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
988330d8983SJohannes Doerfert   if (auto Err = GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal)) {
989330d8983SJohannes Doerfert     DP("Missing symbol %s, continue execution anyway.\n",
990330d8983SJohannes Doerfert        DevEnvGlobal.getName().data());
991330d8983SJohannes Doerfert     consumeError(std::move(Err));
992330d8983SJohannes Doerfert   }
993330d8983SJohannes Doerfert   return Plugin::success();
994330d8983SJohannes Doerfert }
995330d8983SJohannes Doerfert 
996330d8983SJohannes Doerfert Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin,
997330d8983SJohannes Doerfert                                              DeviceImageTy &Image,
998330d8983SJohannes Doerfert                                              uint64_t PoolSize) {
999330d8983SJohannes Doerfert   // Free the old pool, if any.
1000330d8983SJohannes Doerfert   if (DeviceMemoryPool.Ptr) {
1001330d8983SJohannes Doerfert     if (auto Err = dataDelete(DeviceMemoryPool.Ptr,
1002330d8983SJohannes Doerfert                               TargetAllocTy::TARGET_ALLOC_DEVICE))
1003330d8983SJohannes Doerfert       return Err;
1004330d8983SJohannes Doerfert   }
1005330d8983SJohannes Doerfert 
1006330d8983SJohannes Doerfert   DeviceMemoryPool.Size = PoolSize;
1007330d8983SJohannes Doerfert   auto AllocOrErr = dataAlloc(PoolSize, /*HostPtr=*/nullptr,
1008330d8983SJohannes Doerfert                               TargetAllocTy::TARGET_ALLOC_DEVICE);
1009330d8983SJohannes Doerfert   if (AllocOrErr) {
1010330d8983SJohannes Doerfert     DeviceMemoryPool.Ptr = *AllocOrErr;
1011330d8983SJohannes Doerfert   } else {
1012330d8983SJohannes Doerfert     auto Err = AllocOrErr.takeError();
1013330d8983SJohannes Doerfert     REPORT("Failure to allocate device memory for global memory pool: %s\n",
1014330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1015330d8983SJohannes Doerfert     DeviceMemoryPool.Ptr = nullptr;
1016330d8983SJohannes Doerfert     DeviceMemoryPool.Size = 0;
1017330d8983SJohannes Doerfert   }
1018330d8983SJohannes Doerfert 
1019330d8983SJohannes Doerfert   // Create the metainfo of the device environment global.
1020330d8983SJohannes Doerfert   GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
1021330d8983SJohannes Doerfert   if (!GHandler.isSymbolInImage(*this, Image,
1022330d8983SJohannes Doerfert                                 "__omp_rtl_device_memory_pool_tracker")) {
1023330d8983SJohannes Doerfert     DP("Skip the memory pool as there is no tracker symbol in the image.");
1024330d8983SJohannes Doerfert     return Error::success();
1025330d8983SJohannes Doerfert   }
1026330d8983SJohannes Doerfert 
1027330d8983SJohannes Doerfert   GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
1028330d8983SJohannes Doerfert                          sizeof(DeviceMemoryPoolTrackingTy),
1029330d8983SJohannes Doerfert                          &DeviceMemoryPoolTracking);
1030330d8983SJohannes Doerfert   if (auto Err = GHandler.writeGlobalToDevice(*this, Image, TrackerGlobal))
1031330d8983SJohannes Doerfert     return Err;
1032330d8983SJohannes Doerfert 
1033330d8983SJohannes Doerfert   // Create the metainfo of the device environment global.
1034330d8983SJohannes Doerfert   GlobalTy DevEnvGlobal("__omp_rtl_device_memory_pool",
1035330d8983SJohannes Doerfert                         sizeof(DeviceMemoryPoolTy), &DeviceMemoryPool);
1036330d8983SJohannes Doerfert 
1037330d8983SJohannes Doerfert   // Write device environment values to the device.
1038330d8983SJohannes Doerfert   return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal);
1039330d8983SJohannes Doerfert }
1040330d8983SJohannes Doerfert 
1041330d8983SJohannes Doerfert Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
1042330d8983SJohannes Doerfert                                       DeviceImageTy &Image) {
1043330d8983SJohannes Doerfert   // The plugin either does not need an RPC server or it is unavailible.
1044330d8983SJohannes Doerfert   if (!shouldSetupRPCServer())
1045330d8983SJohannes Doerfert     return Plugin::success();
1046330d8983SJohannes Doerfert 
1047330d8983SJohannes Doerfert   // Check if this device needs to run an RPC server.
1048330d8983SJohannes Doerfert   RPCServerTy &Server = Plugin.getRPCServer();
1049330d8983SJohannes Doerfert   auto UsingOrErr =
1050330d8983SJohannes Doerfert       Server.isDeviceUsingRPC(*this, Plugin.getGlobalHandler(), Image);
1051330d8983SJohannes Doerfert   if (!UsingOrErr)
1052330d8983SJohannes Doerfert     return UsingOrErr.takeError();
1053330d8983SJohannes Doerfert 
1054330d8983SJohannes Doerfert   if (!UsingOrErr.get())
1055330d8983SJohannes Doerfert     return Plugin::success();
1056330d8983SJohannes Doerfert 
1057330d8983SJohannes Doerfert   if (auto Err = Server.initDevice(*this, Plugin.getGlobalHandler(), Image))
1058330d8983SJohannes Doerfert     return Err;
1059330d8983SJohannes Doerfert 
1060134401deSJoseph Huber   if (auto Err = Server.startThread())
1061134401deSJoseph Huber     return Err;
1062134401deSJoseph Huber 
1063330d8983SJohannes Doerfert   RPCServer = &Server;
1064330d8983SJohannes Doerfert   DP("Running an RPC server on device %d\n", getDeviceId());
1065330d8983SJohannes Doerfert   return Plugin::success();
1066330d8983SJohannes Doerfert }
1067330d8983SJohannes Doerfert 
1068330d8983SJohannes Doerfert Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr,
1069330d8983SJohannes Doerfert                                          size_t Size, bool ExternallyLocked) {
1070330d8983SJohannes Doerfert   // Insert the new entry into the map.
1071330d8983SJohannes Doerfert   auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size, ExternallyLocked});
1072330d8983SJohannes Doerfert   if (!Res.second)
1073330d8983SJohannes Doerfert     return Plugin::error("Cannot insert locked buffer entry");
1074330d8983SJohannes Doerfert 
1075330d8983SJohannes Doerfert   // Check whether the next entry overlaps with the inserted entry.
1076330d8983SJohannes Doerfert   auto It = std::next(Res.first);
1077330d8983SJohannes Doerfert   if (It == Allocs.end())
1078330d8983SJohannes Doerfert     return Plugin::success();
1079330d8983SJohannes Doerfert 
1080330d8983SJohannes Doerfert   const EntryTy *NextEntry = &(*It);
1081330d8983SJohannes Doerfert   if (intersects(NextEntry->HstPtr, NextEntry->Size, HstPtr, Size))
1082330d8983SJohannes Doerfert     return Plugin::error("Partial overlapping not allowed in locked buffers");
1083330d8983SJohannes Doerfert 
1084330d8983SJohannes Doerfert   return Plugin::success();
1085330d8983SJohannes Doerfert }
1086330d8983SJohannes Doerfert 
1087330d8983SJohannes Doerfert Error PinnedAllocationMapTy::eraseEntry(const EntryTy &Entry) {
1088330d8983SJohannes Doerfert   // Erase the existing entry. Notice this requires an additional map lookup,
1089330d8983SJohannes Doerfert   // but this should not be a performance issue. Using iterators would make
1090330d8983SJohannes Doerfert   // the code more difficult to read.
1091330d8983SJohannes Doerfert   size_t Erased = Allocs.erase({Entry.HstPtr});
1092330d8983SJohannes Doerfert   if (!Erased)
1093330d8983SJohannes Doerfert     return Plugin::error("Cannot erase locked buffer entry");
1094330d8983SJohannes Doerfert   return Plugin::success();
1095330d8983SJohannes Doerfert }
1096330d8983SJohannes Doerfert 
1097330d8983SJohannes Doerfert Error PinnedAllocationMapTy::registerEntryUse(const EntryTy &Entry,
1098330d8983SJohannes Doerfert                                               void *HstPtr, size_t Size) {
1099330d8983SJohannes Doerfert   if (!contains(Entry.HstPtr, Entry.Size, HstPtr, Size))
1100330d8983SJohannes Doerfert     return Plugin::error("Partial overlapping not allowed in locked buffers");
1101330d8983SJohannes Doerfert 
1102330d8983SJohannes Doerfert   ++Entry.References;
1103330d8983SJohannes Doerfert   return Plugin::success();
1104330d8983SJohannes Doerfert }
1105330d8983SJohannes Doerfert 
1106330d8983SJohannes Doerfert Expected<bool> PinnedAllocationMapTy::unregisterEntryUse(const EntryTy &Entry) {
1107330d8983SJohannes Doerfert   if (Entry.References == 0)
1108330d8983SJohannes Doerfert     return Plugin::error("Invalid number of references");
1109330d8983SJohannes Doerfert 
1110330d8983SJohannes Doerfert   // Return whether this was the last user.
1111330d8983SJohannes Doerfert   return (--Entry.References == 0);
1112330d8983SJohannes Doerfert }
1113330d8983SJohannes Doerfert 
1114330d8983SJohannes Doerfert Error PinnedAllocationMapTy::registerHostBuffer(void *HstPtr,
1115330d8983SJohannes Doerfert                                                 void *DevAccessiblePtr,
1116330d8983SJohannes Doerfert                                                 size_t Size) {
1117330d8983SJohannes Doerfert   assert(HstPtr && "Invalid pointer");
1118330d8983SJohannes Doerfert   assert(DevAccessiblePtr && "Invalid pointer");
1119330d8983SJohannes Doerfert   assert(Size && "Invalid size");
1120330d8983SJohannes Doerfert 
1121330d8983SJohannes Doerfert   std::lock_guard<std::shared_mutex> Lock(Mutex);
1122330d8983SJohannes Doerfert 
1123330d8983SJohannes Doerfert   // No pinned allocation should intersect.
1124330d8983SJohannes Doerfert   const EntryTy *Entry = findIntersecting(HstPtr);
1125330d8983SJohannes Doerfert   if (Entry)
1126330d8983SJohannes Doerfert     return Plugin::error("Cannot insert entry due to an existing one");
1127330d8983SJohannes Doerfert 
1128330d8983SJohannes Doerfert   // Now insert the new entry.
1129330d8983SJohannes Doerfert   return insertEntry(HstPtr, DevAccessiblePtr, Size);
1130330d8983SJohannes Doerfert }
1131330d8983SJohannes Doerfert 
1132330d8983SJohannes Doerfert Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) {
1133330d8983SJohannes Doerfert   assert(HstPtr && "Invalid pointer");
1134330d8983SJohannes Doerfert 
1135330d8983SJohannes Doerfert   std::lock_guard<std::shared_mutex> Lock(Mutex);
1136330d8983SJohannes Doerfert 
1137330d8983SJohannes Doerfert   const EntryTy *Entry = findIntersecting(HstPtr);
1138330d8983SJohannes Doerfert   if (!Entry)
1139330d8983SJohannes Doerfert     return Plugin::error("Cannot find locked buffer");
1140330d8983SJohannes Doerfert 
1141330d8983SJohannes Doerfert   // The address in the entry should be the same we are unregistering.
1142330d8983SJohannes Doerfert   if (Entry->HstPtr != HstPtr)
1143330d8983SJohannes Doerfert     return Plugin::error("Unexpected host pointer in locked buffer entry");
1144330d8983SJohannes Doerfert 
1145330d8983SJohannes Doerfert   // Unregister from the entry.
1146330d8983SJohannes Doerfert   auto LastUseOrErr = unregisterEntryUse(*Entry);
1147330d8983SJohannes Doerfert   if (!LastUseOrErr)
1148330d8983SJohannes Doerfert     return LastUseOrErr.takeError();
1149330d8983SJohannes Doerfert 
1150330d8983SJohannes Doerfert   // There should be no other references to the pinned allocation.
1151330d8983SJohannes Doerfert   if (!(*LastUseOrErr))
1152330d8983SJohannes Doerfert     return Plugin::error("The locked buffer is still being used");
1153330d8983SJohannes Doerfert 
1154330d8983SJohannes Doerfert   // Erase the entry from the map.
1155330d8983SJohannes Doerfert   return eraseEntry(*Entry);
1156330d8983SJohannes Doerfert }
1157330d8983SJohannes Doerfert 
1158330d8983SJohannes Doerfert Expected<void *> PinnedAllocationMapTy::lockHostBuffer(void *HstPtr,
1159330d8983SJohannes Doerfert                                                        size_t Size) {
1160330d8983SJohannes Doerfert   assert(HstPtr && "Invalid pointer");
1161330d8983SJohannes Doerfert   assert(Size && "Invalid size");
1162330d8983SJohannes Doerfert 
1163330d8983SJohannes Doerfert   std::lock_guard<std::shared_mutex> Lock(Mutex);
1164330d8983SJohannes Doerfert 
1165330d8983SJohannes Doerfert   const EntryTy *Entry = findIntersecting(HstPtr);
1166330d8983SJohannes Doerfert 
1167330d8983SJohannes Doerfert   if (Entry) {
1168330d8983SJohannes Doerfert     // An already registered intersecting buffer was found. Register a new use.
1169330d8983SJohannes Doerfert     if (auto Err = registerEntryUse(*Entry, HstPtr, Size))
1170330d8983SJohannes Doerfert       return std::move(Err);
1171330d8983SJohannes Doerfert 
1172330d8983SJohannes Doerfert     // Return the device accessible pointer with the correct offset.
117308533a3eSJohannes Doerfert     return utils::advancePtr(Entry->DevAccessiblePtr,
117408533a3eSJohannes Doerfert                              utils::getPtrDiff(HstPtr, Entry->HstPtr));
1175330d8983SJohannes Doerfert   }
1176330d8983SJohannes Doerfert 
1177330d8983SJohannes Doerfert   // No intersecting registered allocation found in the map. First, lock the
1178330d8983SJohannes Doerfert   // host buffer and retrieve the device accessible pointer.
1179330d8983SJohannes Doerfert   auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size);
1180330d8983SJohannes Doerfert   if (!DevAccessiblePtrOrErr)
1181330d8983SJohannes Doerfert     return DevAccessiblePtrOrErr.takeError();
1182330d8983SJohannes Doerfert 
1183330d8983SJohannes Doerfert   // Now insert the new entry into the map.
1184330d8983SJohannes Doerfert   if (auto Err = insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size))
1185330d8983SJohannes Doerfert     return std::move(Err);
1186330d8983SJohannes Doerfert 
1187330d8983SJohannes Doerfert   // Return the device accessible pointer.
1188330d8983SJohannes Doerfert   return *DevAccessiblePtrOrErr;
1189330d8983SJohannes Doerfert }
1190330d8983SJohannes Doerfert 
1191330d8983SJohannes Doerfert Error PinnedAllocationMapTy::unlockHostBuffer(void *HstPtr) {
1192330d8983SJohannes Doerfert   assert(HstPtr && "Invalid pointer");
1193330d8983SJohannes Doerfert 
1194330d8983SJohannes Doerfert   std::lock_guard<std::shared_mutex> Lock(Mutex);
1195330d8983SJohannes Doerfert 
1196330d8983SJohannes Doerfert   const EntryTy *Entry = findIntersecting(HstPtr);
1197330d8983SJohannes Doerfert   if (!Entry)
1198330d8983SJohannes Doerfert     return Plugin::error("Cannot find locked buffer");
1199330d8983SJohannes Doerfert 
1200330d8983SJohannes Doerfert   // Unregister from the locked buffer. No need to do anything if there are
1201330d8983SJohannes Doerfert   // others using the allocation.
1202330d8983SJohannes Doerfert   auto LastUseOrErr = unregisterEntryUse(*Entry);
1203330d8983SJohannes Doerfert   if (!LastUseOrErr)
1204330d8983SJohannes Doerfert     return LastUseOrErr.takeError();
1205330d8983SJohannes Doerfert 
1206330d8983SJohannes Doerfert   // No need to do anything if there are others using the allocation.
1207330d8983SJohannes Doerfert   if (!(*LastUseOrErr))
1208330d8983SJohannes Doerfert     return Plugin::success();
1209330d8983SJohannes Doerfert 
1210330d8983SJohannes Doerfert   // This was the last user of the allocation. Unlock the original locked buffer
1211330d8983SJohannes Doerfert   // if it was locked by the plugin. Do not unlock it if it was locked by an
1212330d8983SJohannes Doerfert   // external entity. Unlock the buffer using the host pointer of the entry.
1213330d8983SJohannes Doerfert   if (!Entry->ExternallyLocked)
1214330d8983SJohannes Doerfert     if (auto Err = Device.dataUnlockImpl(Entry->HstPtr))
1215330d8983SJohannes Doerfert       return Err;
1216330d8983SJohannes Doerfert 
1217330d8983SJohannes Doerfert   // Erase the entry from the map.
1218330d8983SJohannes Doerfert   return eraseEntry(*Entry);
1219330d8983SJohannes Doerfert }
1220330d8983SJohannes Doerfert 
1221330d8983SJohannes Doerfert Error PinnedAllocationMapTy::lockMappedHostBuffer(void *HstPtr, size_t Size) {
1222330d8983SJohannes Doerfert   assert(HstPtr && "Invalid pointer");
1223330d8983SJohannes Doerfert   assert(Size && "Invalid size");
1224330d8983SJohannes Doerfert 
1225330d8983SJohannes Doerfert   std::lock_guard<std::shared_mutex> Lock(Mutex);
1226330d8983SJohannes Doerfert 
1227330d8983SJohannes Doerfert   // If previously registered, just register a new user on the entry.
1228330d8983SJohannes Doerfert   const EntryTy *Entry = findIntersecting(HstPtr);
1229330d8983SJohannes Doerfert   if (Entry)
1230330d8983SJohannes Doerfert     return registerEntryUse(*Entry, HstPtr, Size);
1231330d8983SJohannes Doerfert 
1232330d8983SJohannes Doerfert   size_t BaseSize;
1233330d8983SJohannes Doerfert   void *BaseHstPtr, *BaseDevAccessiblePtr;
1234330d8983SJohannes Doerfert 
1235330d8983SJohannes Doerfert   // Check if it was externally pinned by a vendor-specific API.
1236330d8983SJohannes Doerfert   auto IsPinnedOrErr = Device.isPinnedPtrImpl(HstPtr, BaseHstPtr,
1237330d8983SJohannes Doerfert                                               BaseDevAccessiblePtr, BaseSize);
1238330d8983SJohannes Doerfert   if (!IsPinnedOrErr)
1239330d8983SJohannes Doerfert     return IsPinnedOrErr.takeError();
1240330d8983SJohannes Doerfert 
1241330d8983SJohannes Doerfert   // If pinned, just insert the entry representing the whole pinned buffer.
1242330d8983SJohannes Doerfert   if (*IsPinnedOrErr)
1243330d8983SJohannes Doerfert     return insertEntry(BaseHstPtr, BaseDevAccessiblePtr, BaseSize,
1244330d8983SJohannes Doerfert                        /*Externallylocked=*/true);
1245330d8983SJohannes Doerfert 
1246330d8983SJohannes Doerfert   // Not externally pinned. Do nothing if locking of mapped buffers is disabled.
1247330d8983SJohannes Doerfert   if (!LockMappedBuffers)
1248330d8983SJohannes Doerfert     return Plugin::success();
1249330d8983SJohannes Doerfert 
1250330d8983SJohannes Doerfert   // Otherwise, lock the buffer and insert the new entry.
1251330d8983SJohannes Doerfert   auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size);
1252330d8983SJohannes Doerfert   if (!DevAccessiblePtrOrErr) {
1253330d8983SJohannes Doerfert     // Errors may be tolerated.
1254330d8983SJohannes Doerfert     if (!IgnoreLockMappedFailures)
1255330d8983SJohannes Doerfert       return DevAccessiblePtrOrErr.takeError();
1256330d8983SJohannes Doerfert 
1257330d8983SJohannes Doerfert     consumeError(DevAccessiblePtrOrErr.takeError());
1258330d8983SJohannes Doerfert     return Plugin::success();
1259330d8983SJohannes Doerfert   }
1260330d8983SJohannes Doerfert 
1261330d8983SJohannes Doerfert   return insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size);
1262330d8983SJohannes Doerfert }
1263330d8983SJohannes Doerfert 
1264330d8983SJohannes Doerfert Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) {
1265330d8983SJohannes Doerfert   assert(HstPtr && "Invalid pointer");
1266330d8983SJohannes Doerfert 
1267330d8983SJohannes Doerfert   std::lock_guard<std::shared_mutex> Lock(Mutex);
1268330d8983SJohannes Doerfert 
1269330d8983SJohannes Doerfert   // Check whether there is any intersecting entry.
1270330d8983SJohannes Doerfert   const EntryTy *Entry = findIntersecting(HstPtr);
1271330d8983SJohannes Doerfert 
1272330d8983SJohannes Doerfert   // No entry but automatic locking of mapped buffers is disabled, so
1273330d8983SJohannes Doerfert   // nothing to do.
1274330d8983SJohannes Doerfert   if (!Entry && !LockMappedBuffers)
1275330d8983SJohannes Doerfert     return Plugin::success();
1276330d8983SJohannes Doerfert 
1277330d8983SJohannes Doerfert   // No entry, automatic locking is enabled, but the locking may have failed, so
1278330d8983SJohannes Doerfert   // do nothing.
1279330d8983SJohannes Doerfert   if (!Entry && IgnoreLockMappedFailures)
1280330d8983SJohannes Doerfert     return Plugin::success();
1281330d8983SJohannes Doerfert 
1282330d8983SJohannes Doerfert   // No entry, but the automatic locking is enabled, so this is an error.
1283330d8983SJohannes Doerfert   if (!Entry)
1284330d8983SJohannes Doerfert     return Plugin::error("Locked buffer not found");
1285330d8983SJohannes Doerfert 
1286330d8983SJohannes Doerfert   // There is entry, so unregister a user and check whether it was the last one.
1287330d8983SJohannes Doerfert   auto LastUseOrErr = unregisterEntryUse(*Entry);
1288330d8983SJohannes Doerfert   if (!LastUseOrErr)
1289330d8983SJohannes Doerfert     return LastUseOrErr.takeError();
1290330d8983SJohannes Doerfert 
1291330d8983SJohannes Doerfert   // If it is not the last one, there is nothing to do.
1292330d8983SJohannes Doerfert   if (!(*LastUseOrErr))
1293330d8983SJohannes Doerfert     return Plugin::success();
1294330d8983SJohannes Doerfert 
1295330d8983SJohannes Doerfert   // Otherwise, if it was the last and the buffer was locked by the plugin,
1296330d8983SJohannes Doerfert   // unlock it.
1297330d8983SJohannes Doerfert   if (!Entry->ExternallyLocked)
1298330d8983SJohannes Doerfert     if (auto Err = Device.dataUnlockImpl(Entry->HstPtr))
1299330d8983SJohannes Doerfert       return Err;
1300330d8983SJohannes Doerfert 
1301330d8983SJohannes Doerfert   // Finally erase the entry from the map.
1302330d8983SJohannes Doerfert   return eraseEntry(*Entry);
1303330d8983SJohannes Doerfert }
1304330d8983SJohannes Doerfert 
1305330d8983SJohannes Doerfert Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) {
1306330d8983SJohannes Doerfert   if (!AsyncInfo || !AsyncInfo->Queue)
1307330d8983SJohannes Doerfert     return Plugin::error("Invalid async info queue");
1308330d8983SJohannes Doerfert 
1309330d8983SJohannes Doerfert   if (auto Err = synchronizeImpl(*AsyncInfo))
1310330d8983SJohannes Doerfert     return Err;
1311330d8983SJohannes Doerfert 
1312330d8983SJohannes Doerfert   for (auto *Ptr : AsyncInfo->AssociatedAllocations)
1313330d8983SJohannes Doerfert     if (auto Err = dataDelete(Ptr, TargetAllocTy::TARGET_ALLOC_DEVICE))
1314330d8983SJohannes Doerfert       return Err;
1315330d8983SJohannes Doerfert   AsyncInfo->AssociatedAllocations.clear();
1316330d8983SJohannes Doerfert 
1317330d8983SJohannes Doerfert   return Plugin::success();
1318330d8983SJohannes Doerfert }
1319330d8983SJohannes Doerfert 
1320330d8983SJohannes Doerfert Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) {
1321330d8983SJohannes Doerfert   if (!AsyncInfo || !AsyncInfo->Queue)
1322330d8983SJohannes Doerfert     return Plugin::error("Invalid async info queue");
1323330d8983SJohannes Doerfert 
1324330d8983SJohannes Doerfert   return queryAsyncImpl(*AsyncInfo);
1325330d8983SJohannes Doerfert }
1326330d8983SJohannes Doerfert 
1327330d8983SJohannes Doerfert Error GenericDeviceTy::memoryVAMap(void **Addr, void *VAddr, size_t *RSize) {
1328330d8983SJohannes Doerfert   return Plugin::error("Device does not suppport VA Management");
1329330d8983SJohannes Doerfert }
1330330d8983SJohannes Doerfert 
1331330d8983SJohannes Doerfert Error GenericDeviceTy::memoryVAUnMap(void *VAddr, size_t Size) {
1332330d8983SJohannes Doerfert   return Plugin::error("Device does not suppport VA Management");
1333330d8983SJohannes Doerfert }
1334330d8983SJohannes Doerfert 
1335330d8983SJohannes Doerfert Error GenericDeviceTy::getDeviceMemorySize(uint64_t &DSize) {
1336330d8983SJohannes Doerfert   return Plugin::error(
1337330d8983SJohannes Doerfert       "Mising getDeviceMemorySize impelmentation (required by RR-heuristic");
1338330d8983SJohannes Doerfert }
1339330d8983SJohannes Doerfert 
1340330d8983SJohannes Doerfert Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
1341330d8983SJohannes Doerfert                                             TargetAllocTy Kind) {
1342330d8983SJohannes Doerfert   void *Alloc = nullptr;
1343330d8983SJohannes Doerfert 
1344f42f57b5SJoseph Huber   if (Plugin.getRecordReplay().isRecordingOrReplaying())
1345f42f57b5SJoseph Huber     return Plugin.getRecordReplay().alloc(Size);
1346330d8983SJohannes Doerfert 
1347330d8983SJohannes Doerfert   switch (Kind) {
1348330d8983SJohannes Doerfert   case TARGET_ALLOC_DEFAULT:
1349330d8983SJohannes Doerfert   case TARGET_ALLOC_DEVICE_NON_BLOCKING:
1350330d8983SJohannes Doerfert   case TARGET_ALLOC_DEVICE:
1351330d8983SJohannes Doerfert     if (MemoryManager) {
1352330d8983SJohannes Doerfert       Alloc = MemoryManager->allocate(Size, HostPtr);
1353330d8983SJohannes Doerfert       if (!Alloc)
1354330d8983SJohannes Doerfert         return Plugin::error("Failed to allocate from memory manager");
1355330d8983SJohannes Doerfert       break;
1356330d8983SJohannes Doerfert     }
1357330d8983SJohannes Doerfert     [[fallthrough]];
1358330d8983SJohannes Doerfert   case TARGET_ALLOC_HOST:
1359330d8983SJohannes Doerfert   case TARGET_ALLOC_SHARED:
1360330d8983SJohannes Doerfert     Alloc = allocate(Size, HostPtr, Kind);
1361330d8983SJohannes Doerfert     if (!Alloc)
1362330d8983SJohannes Doerfert       return Plugin::error("Failed to allocate from device allocator");
1363330d8983SJohannes Doerfert   }
1364330d8983SJohannes Doerfert 
1365330d8983SJohannes Doerfert   // Report error if the memory manager or the device allocator did not return
1366330d8983SJohannes Doerfert   // any memory buffer.
1367330d8983SJohannes Doerfert   if (!Alloc)
1368330d8983SJohannes Doerfert     return Plugin::error("Invalid target data allocation kind or requested "
1369330d8983SJohannes Doerfert                          "allocator not implemented yet");
1370330d8983SJohannes Doerfert 
1371330d8983SJohannes Doerfert   // Register allocated buffer as pinned memory if the type is host memory.
1372330d8983SJohannes Doerfert   if (Kind == TARGET_ALLOC_HOST)
1373330d8983SJohannes Doerfert     if (auto Err = PinnedAllocs.registerHostBuffer(Alloc, Alloc, Size))
1374330d8983SJohannes Doerfert       return std::move(Err);
1375330d8983SJohannes Doerfert 
1376c95abe94SJohannes Doerfert   // Keep track of the allocation stack if we track allocation traces.
1377c95abe94SJohannes Doerfert   if (OMPX_TrackAllocationTraces) {
1378c95abe94SJohannes Doerfert     std::string StackTrace;
1379c95abe94SJohannes Doerfert     llvm::raw_string_ostream OS(StackTrace);
1380c95abe94SJohannes Doerfert     llvm::sys::PrintStackTrace(OS);
1381c95abe94SJohannes Doerfert 
1382c95abe94SJohannes Doerfert     AllocationTraceInfoTy *ATI = new AllocationTraceInfoTy();
1383c95abe94SJohannes Doerfert     ATI->AllocationTrace = std::move(StackTrace);
1384c95abe94SJohannes Doerfert     ATI->DevicePtr = Alloc;
1385c95abe94SJohannes Doerfert     ATI->HostPtr = HostPtr;
1386c95abe94SJohannes Doerfert     ATI->Size = Size;
1387c95abe94SJohannes Doerfert     ATI->Kind = Kind;
1388c95abe94SJohannes Doerfert 
1389c95abe94SJohannes Doerfert     auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor();
1390c95abe94SJohannes Doerfert     auto *&MapATI = (*AllocationTraceMap)[Alloc];
1391c95abe94SJohannes Doerfert     ATI->LastAllocationInfo = MapATI;
1392c95abe94SJohannes Doerfert     MapATI = ATI;
1393c95abe94SJohannes Doerfert   }
1394c95abe94SJohannes Doerfert 
1395330d8983SJohannes Doerfert   return Alloc;
1396330d8983SJohannes Doerfert }
1397330d8983SJohannes Doerfert 
1398330d8983SJohannes Doerfert Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
1399330d8983SJohannes Doerfert   // Free is a noop when recording or replaying.
1400f42f57b5SJoseph Huber   if (Plugin.getRecordReplay().isRecordingOrReplaying())
1401330d8983SJohannes Doerfert     return Plugin::success();
1402330d8983SJohannes Doerfert 
1403c95abe94SJohannes Doerfert   // Keep track of the deallocation stack if we track allocation traces.
1404c95abe94SJohannes Doerfert   if (OMPX_TrackAllocationTraces) {
1405c95abe94SJohannes Doerfert     AllocationTraceInfoTy *ATI = nullptr;
1406c95abe94SJohannes Doerfert     {
1407c95abe94SJohannes Doerfert       auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor();
1408c95abe94SJohannes Doerfert       ATI = (*AllocationTraceMap)[TgtPtr];
1409c95abe94SJohannes Doerfert     }
1410c95abe94SJohannes Doerfert 
1411c95abe94SJohannes Doerfert     std::string StackTrace;
1412c95abe94SJohannes Doerfert     llvm::raw_string_ostream OS(StackTrace);
1413c95abe94SJohannes Doerfert     llvm::sys::PrintStackTrace(OS);
1414c95abe94SJohannes Doerfert 
1415c95abe94SJohannes Doerfert     if (!ATI)
1416c95abe94SJohannes Doerfert       ErrorReporter::reportDeallocationOfNonAllocatedPtr(TgtPtr, Kind, ATI,
1417c95abe94SJohannes Doerfert                                                          StackTrace);
1418c95abe94SJohannes Doerfert 
1419c95abe94SJohannes Doerfert     // ATI is not null, thus we can lock it to inspect and modify it further.
1420c95abe94SJohannes Doerfert     std::lock_guard<std::mutex> LG(ATI->Lock);
1421c95abe94SJohannes Doerfert     if (!ATI->DeallocationTrace.empty())
1422c95abe94SJohannes Doerfert       ErrorReporter::reportDeallocationOfDeallocatedPtr(TgtPtr, Kind, ATI,
1423c95abe94SJohannes Doerfert                                                         StackTrace);
1424c95abe94SJohannes Doerfert 
1425c95abe94SJohannes Doerfert     if (ATI->Kind != Kind)
1426c95abe94SJohannes Doerfert       ErrorReporter::reportDeallocationOfWrongPtrKind(TgtPtr, Kind, ATI,
1427c95abe94SJohannes Doerfert                                                       StackTrace);
1428c95abe94SJohannes Doerfert 
1429c95abe94SJohannes Doerfert     ATI->DeallocationTrace = StackTrace;
1430c95abe94SJohannes Doerfert 
1431c95abe94SJohannes Doerfert #undef DEALLOCATION_ERROR
1432c95abe94SJohannes Doerfert   }
1433c95abe94SJohannes Doerfert 
1434330d8983SJohannes Doerfert   int Res;
1435b438a817SJhonatan Cléto   switch (Kind) {
1436b438a817SJhonatan Cléto   case TARGET_ALLOC_DEFAULT:
1437b438a817SJhonatan Cléto   case TARGET_ALLOC_DEVICE_NON_BLOCKING:
1438b438a817SJhonatan Cléto   case TARGET_ALLOC_DEVICE:
1439b438a817SJhonatan Cléto     if (MemoryManager) {
1440330d8983SJohannes Doerfert       Res = MemoryManager->free(TgtPtr);
1441330d8983SJohannes Doerfert       if (Res)
1442b438a817SJhonatan Cléto         return Plugin::error(
1443b438a817SJhonatan Cléto             "Failure to deallocate device pointer %p via memory manager",
1444b438a817SJhonatan Cléto             TgtPtr);
1445b438a817SJhonatan Cléto       break;
1446b438a817SJhonatan Cléto     }
1447b438a817SJhonatan Cléto     [[fallthrough]];
1448b438a817SJhonatan Cléto   case TARGET_ALLOC_HOST:
1449b438a817SJhonatan Cléto   case TARGET_ALLOC_SHARED:
1450b438a817SJhonatan Cléto     Res = free(TgtPtr, Kind);
1451b438a817SJhonatan Cléto     if (Res)
1452b438a817SJhonatan Cléto       return Plugin::error(
1453b438a817SJhonatan Cléto           "Failure to deallocate device pointer %p via device deallocator",
1454b438a817SJhonatan Cléto           TgtPtr);
1455b438a817SJhonatan Cléto   }
1456330d8983SJohannes Doerfert 
1457330d8983SJohannes Doerfert   // Unregister deallocated pinned memory buffer if the type is host memory.
1458330d8983SJohannes Doerfert   if (Kind == TARGET_ALLOC_HOST)
1459330d8983SJohannes Doerfert     if (auto Err = PinnedAllocs.unregisterHostBuffer(TgtPtr))
1460330d8983SJohannes Doerfert       return Err;
1461330d8983SJohannes Doerfert 
1462330d8983SJohannes Doerfert   return Plugin::success();
1463330d8983SJohannes Doerfert }
1464330d8983SJohannes Doerfert 
1465330d8983SJohannes Doerfert Error GenericDeviceTy::dataSubmit(void *TgtPtr, const void *HstPtr,
1466330d8983SJohannes Doerfert                                   int64_t Size, __tgt_async_info *AsyncInfo) {
1467330d8983SJohannes Doerfert   AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1468330d8983SJohannes Doerfert 
1469330d8983SJohannes Doerfert   auto Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper);
1470330d8983SJohannes Doerfert   AsyncInfoWrapper.finalize(Err);
1471330d8983SJohannes Doerfert   return Err;
1472330d8983SJohannes Doerfert }
1473330d8983SJohannes Doerfert 
1474330d8983SJohannes Doerfert Error GenericDeviceTy::dataRetrieve(void *HstPtr, const void *TgtPtr,
1475330d8983SJohannes Doerfert                                     int64_t Size, __tgt_async_info *AsyncInfo) {
1476330d8983SJohannes Doerfert   AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1477330d8983SJohannes Doerfert 
1478330d8983SJohannes Doerfert   auto Err = dataRetrieveImpl(HstPtr, TgtPtr, Size, AsyncInfoWrapper);
1479330d8983SJohannes Doerfert   AsyncInfoWrapper.finalize(Err);
1480330d8983SJohannes Doerfert   return Err;
1481330d8983SJohannes Doerfert }
1482330d8983SJohannes Doerfert 
1483330d8983SJohannes Doerfert Error GenericDeviceTy::dataExchange(const void *SrcPtr, GenericDeviceTy &DstDev,
1484330d8983SJohannes Doerfert                                     void *DstPtr, int64_t Size,
1485330d8983SJohannes Doerfert                                     __tgt_async_info *AsyncInfo) {
1486330d8983SJohannes Doerfert   AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1487330d8983SJohannes Doerfert 
1488330d8983SJohannes Doerfert   auto Err = dataExchangeImpl(SrcPtr, DstDev, DstPtr, Size, AsyncInfoWrapper);
1489330d8983SJohannes Doerfert   AsyncInfoWrapper.finalize(Err);
1490330d8983SJohannes Doerfert   return Err;
1491330d8983SJohannes Doerfert }
1492330d8983SJohannes Doerfert 
1493330d8983SJohannes Doerfert Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
1494330d8983SJohannes Doerfert                                     ptrdiff_t *ArgOffsets,
1495330d8983SJohannes Doerfert                                     KernelArgsTy &KernelArgs,
1496330d8983SJohannes Doerfert                                     __tgt_async_info *AsyncInfo) {
1497330d8983SJohannes Doerfert   AsyncInfoWrapperTy AsyncInfoWrapper(
1498f42f57b5SJoseph Huber       *this,
1499f42f57b5SJoseph Huber       Plugin.getRecordReplay().isRecordingOrReplaying() ? nullptr : AsyncInfo);
1500330d8983SJohannes Doerfert 
1501330d8983SJohannes Doerfert   GenericKernelTy &GenericKernel =
1502330d8983SJohannes Doerfert       *reinterpret_cast<GenericKernelTy *>(EntryPtr);
1503330d8983SJohannes Doerfert 
15049a101322SJohannes Doerfert   {
15059a101322SJohannes Doerfert     std::string StackTrace;
15069a101322SJohannes Doerfert     if (OMPX_TrackNumKernelLaunches) {
15079a101322SJohannes Doerfert       llvm::raw_string_ostream OS(StackTrace);
15089a101322SJohannes Doerfert       llvm::sys::PrintStackTrace(OS);
15099a101322SJohannes Doerfert     }
15109a101322SJohannes Doerfert 
15119a101322SJohannes Doerfert     auto KernelTraceInfoRecord = KernelLaunchTraces.getExclusiveAccessor();
15129a101322SJohannes Doerfert     (*KernelTraceInfoRecord)
15139a101322SJohannes Doerfert         .emplace(&GenericKernel, std::move(StackTrace), AsyncInfo);
15149a101322SJohannes Doerfert   }
15159a101322SJohannes Doerfert 
1516330d8983SJohannes Doerfert   auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs,
1517330d8983SJohannes Doerfert                                   AsyncInfoWrapper);
1518330d8983SJohannes Doerfert 
1519330d8983SJohannes Doerfert   // 'finalize' here to guarantee next record-replay actions are in-sync
1520330d8983SJohannes Doerfert   AsyncInfoWrapper.finalize(Err);
1521330d8983SJohannes Doerfert 
1522f42f57b5SJoseph Huber   RecordReplayTy &RecordReplay = Plugin.getRecordReplay();
1523330d8983SJohannes Doerfert   if (RecordReplay.isRecordingOrReplaying() &&
1524330d8983SJohannes Doerfert       RecordReplay.isSaveOutputEnabled())
1525330d8983SJohannes Doerfert     RecordReplay.saveKernelOutputInfo(GenericKernel.getName());
1526330d8983SJohannes Doerfert 
1527330d8983SJohannes Doerfert   return Err;
1528330d8983SJohannes Doerfert }
1529330d8983SJohannes Doerfert 
1530330d8983SJohannes Doerfert Error GenericDeviceTy::initAsyncInfo(__tgt_async_info **AsyncInfoPtr) {
1531330d8983SJohannes Doerfert   assert(AsyncInfoPtr && "Invalid async info");
1532330d8983SJohannes Doerfert 
1533330d8983SJohannes Doerfert   *AsyncInfoPtr = new __tgt_async_info();
1534330d8983SJohannes Doerfert 
1535330d8983SJohannes Doerfert   AsyncInfoWrapperTy AsyncInfoWrapper(*this, *AsyncInfoPtr);
1536330d8983SJohannes Doerfert 
1537330d8983SJohannes Doerfert   auto Err = initAsyncInfoImpl(AsyncInfoWrapper);
1538330d8983SJohannes Doerfert   AsyncInfoWrapper.finalize(Err);
1539330d8983SJohannes Doerfert   return Err;
1540330d8983SJohannes Doerfert }
1541330d8983SJohannes Doerfert 
1542330d8983SJohannes Doerfert Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
1543330d8983SJohannes Doerfert   assert(DeviceInfo && "Invalid device info");
1544330d8983SJohannes Doerfert 
1545330d8983SJohannes Doerfert   return initDeviceInfoImpl(DeviceInfo);
1546330d8983SJohannes Doerfert }
1547330d8983SJohannes Doerfert 
1548330d8983SJohannes Doerfert Error GenericDeviceTy::printInfo() {
1549330d8983SJohannes Doerfert   InfoQueueTy InfoQueue;
1550330d8983SJohannes Doerfert 
1551330d8983SJohannes Doerfert   // Get the vendor-specific info entries describing the device properties.
1552330d8983SJohannes Doerfert   if (auto Err = obtainInfoImpl(InfoQueue))
1553330d8983SJohannes Doerfert     return Err;
1554330d8983SJohannes Doerfert 
1555330d8983SJohannes Doerfert   // Print all info entries.
1556330d8983SJohannes Doerfert   InfoQueue.print();
1557330d8983SJohannes Doerfert 
1558330d8983SJohannes Doerfert   return Plugin::success();
1559330d8983SJohannes Doerfert }
1560330d8983SJohannes Doerfert 
1561330d8983SJohannes Doerfert Error GenericDeviceTy::createEvent(void **EventPtrStorage) {
1562330d8983SJohannes Doerfert   return createEventImpl(EventPtrStorage);
1563330d8983SJohannes Doerfert }
1564330d8983SJohannes Doerfert 
1565330d8983SJohannes Doerfert Error GenericDeviceTy::destroyEvent(void *EventPtr) {
1566330d8983SJohannes Doerfert   return destroyEventImpl(EventPtr);
1567330d8983SJohannes Doerfert }
1568330d8983SJohannes Doerfert 
1569330d8983SJohannes Doerfert Error GenericDeviceTy::recordEvent(void *EventPtr,
1570330d8983SJohannes Doerfert                                    __tgt_async_info *AsyncInfo) {
1571330d8983SJohannes Doerfert   AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1572330d8983SJohannes Doerfert 
1573330d8983SJohannes Doerfert   auto Err = recordEventImpl(EventPtr, AsyncInfoWrapper);
1574330d8983SJohannes Doerfert   AsyncInfoWrapper.finalize(Err);
1575330d8983SJohannes Doerfert   return Err;
1576330d8983SJohannes Doerfert }
1577330d8983SJohannes Doerfert 
1578330d8983SJohannes Doerfert Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
1579330d8983SJohannes Doerfert   AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1580330d8983SJohannes Doerfert 
1581330d8983SJohannes Doerfert   auto Err = waitEventImpl(EventPtr, AsyncInfoWrapper);
1582330d8983SJohannes Doerfert   AsyncInfoWrapper.finalize(Err);
1583330d8983SJohannes Doerfert   return Err;
1584330d8983SJohannes Doerfert }
1585330d8983SJohannes Doerfert 
1586330d8983SJohannes Doerfert Error GenericDeviceTy::syncEvent(void *EventPtr) {
1587330d8983SJohannes Doerfert   return syncEventImpl(EventPtr);
1588330d8983SJohannes Doerfert }
1589330d8983SJohannes Doerfert 
1590330d8983SJohannes Doerfert bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); }
1591330d8983SJohannes Doerfert 
1592330d8983SJohannes Doerfert Error GenericPluginTy::init() {
1593435aa766SJoseph Huber   if (Initialized)
1594435aa766SJoseph Huber     return Plugin::success();
1595435aa766SJoseph Huber 
1596330d8983SJohannes Doerfert   auto NumDevicesOrErr = initImpl();
1597330d8983SJohannes Doerfert   if (!NumDevicesOrErr)
1598330d8983SJohannes Doerfert     return NumDevicesOrErr.takeError();
159921f3a609SJoseph Huber   Initialized = true;
1600435aa766SJoseph Huber 
1601330d8983SJohannes Doerfert   NumDevices = *NumDevicesOrErr;
1602330d8983SJohannes Doerfert   if (NumDevices == 0)
1603330d8983SJohannes Doerfert     return Plugin::success();
1604330d8983SJohannes Doerfert 
1605330d8983SJohannes Doerfert   assert(Devices.size() == 0 && "Plugin already initialized");
1606330d8983SJohannes Doerfert   Devices.resize(NumDevices, nullptr);
1607330d8983SJohannes Doerfert 
1608330d8983SJohannes Doerfert   GlobalHandler = createGlobalHandler();
1609330d8983SJohannes Doerfert   assert(GlobalHandler && "Invalid global handler");
1610330d8983SJohannes Doerfert 
1611330d8983SJohannes Doerfert   RPCServer = new RPCServerTy(*this);
1612330d8983SJohannes Doerfert   assert(RPCServer && "Invalid RPC server");
1613330d8983SJohannes Doerfert 
1614f42f57b5SJoseph Huber   RecordReplay = new RecordReplayTy();
1615f42f57b5SJoseph Huber   assert(RecordReplay && "Invalid RR interface");
1616f42f57b5SJoseph Huber 
1617330d8983SJohannes Doerfert   return Plugin::success();
1618330d8983SJohannes Doerfert }
1619330d8983SJohannes Doerfert 
1620330d8983SJohannes Doerfert Error GenericPluginTy::deinit() {
1621435aa766SJoseph Huber   assert(Initialized && "Plugin was not initialized!");
1622435aa766SJoseph Huber 
1623330d8983SJohannes Doerfert   // Deinitialize all active devices.
1624330d8983SJohannes Doerfert   for (int32_t DeviceId = 0; DeviceId < NumDevices; ++DeviceId) {
1625330d8983SJohannes Doerfert     if (Devices[DeviceId]) {
1626330d8983SJohannes Doerfert       if (auto Err = deinitDevice(DeviceId))
1627330d8983SJohannes Doerfert         return Err;
1628330d8983SJohannes Doerfert     }
1629330d8983SJohannes Doerfert     assert(!Devices[DeviceId] && "Device was not deinitialized");
1630330d8983SJohannes Doerfert   }
1631330d8983SJohannes Doerfert 
1632330d8983SJohannes Doerfert   // There is no global handler if no device is available.
1633330d8983SJohannes Doerfert   if (GlobalHandler)
1634330d8983SJohannes Doerfert     delete GlobalHandler;
1635330d8983SJohannes Doerfert 
1636*38b3f45aSJoseph Huber   if (RPCServer && RPCServer->Thread->Running.load(std::memory_order_relaxed))
1637134401deSJoseph Huber     if (Error Err = RPCServer->shutDown())
1638134401deSJoseph Huber       return Err;
1639f0750584SJoseph Huber 
1640f0750584SJoseph Huber   if (RPCServer)
1641330d8983SJohannes Doerfert     delete RPCServer;
1642330d8983SJohannes Doerfert 
1643f42f57b5SJoseph Huber   if (RecordReplay)
1644f42f57b5SJoseph Huber     delete RecordReplay;
1645f42f57b5SJoseph Huber 
1646330d8983SJohannes Doerfert   // Perform last deinitializations on the plugin.
1647435aa766SJoseph Huber   if (Error Err = deinitImpl())
1648435aa766SJoseph Huber     return Err;
1649435aa766SJoseph Huber   Initialized = false;
1650435aa766SJoseph Huber 
1651435aa766SJoseph Huber   return Plugin::success();
1652330d8983SJohannes Doerfert }
1653330d8983SJohannes Doerfert 
1654330d8983SJohannes Doerfert Error GenericPluginTy::initDevice(int32_t DeviceId) {
1655330d8983SJohannes Doerfert   assert(!Devices[DeviceId] && "Device already initialized");
1656330d8983SJohannes Doerfert 
1657330d8983SJohannes Doerfert   // Create the device and save the reference.
1658330d8983SJohannes Doerfert   GenericDeviceTy *Device = createDevice(*this, DeviceId, NumDevices);
1659330d8983SJohannes Doerfert   assert(Device && "Invalid device");
1660330d8983SJohannes Doerfert 
1661330d8983SJohannes Doerfert   // Save the device reference into the list.
1662330d8983SJohannes Doerfert   Devices[DeviceId] = Device;
1663330d8983SJohannes Doerfert 
1664330d8983SJohannes Doerfert   // Initialize the device and its resources.
1665330d8983SJohannes Doerfert   return Device->init(*this);
1666330d8983SJohannes Doerfert }
1667330d8983SJohannes Doerfert 
1668330d8983SJohannes Doerfert Error GenericPluginTy::deinitDevice(int32_t DeviceId) {
1669330d8983SJohannes Doerfert   // The device may be already deinitialized.
1670330d8983SJohannes Doerfert   if (Devices[DeviceId] == nullptr)
1671330d8983SJohannes Doerfert     return Plugin::success();
1672330d8983SJohannes Doerfert 
1673330d8983SJohannes Doerfert   // Deinitialize the device and release its resources.
1674330d8983SJohannes Doerfert   if (auto Err = Devices[DeviceId]->deinit(*this))
1675330d8983SJohannes Doerfert     return Err;
1676330d8983SJohannes Doerfert 
1677330d8983SJohannes Doerfert   // Delete the device and invalidate its reference.
1678330d8983SJohannes Doerfert   delete Devices[DeviceId];
1679330d8983SJohannes Doerfert   Devices[DeviceId] = nullptr;
1680330d8983SJohannes Doerfert 
1681330d8983SJohannes Doerfert   return Plugin::success();
1682330d8983SJohannes Doerfert }
1683330d8983SJohannes Doerfert 
1684330d8983SJohannes Doerfert Expected<bool> GenericPluginTy::checkELFImage(StringRef Image) const {
1685330d8983SJohannes Doerfert   // First check if this image is a regular ELF file.
1686330d8983SJohannes Doerfert   if (!utils::elf::isELF(Image))
1687330d8983SJohannes Doerfert     return false;
1688330d8983SJohannes Doerfert 
1689330d8983SJohannes Doerfert   // Check if this image is an ELF with a matching machine value.
1690330d8983SJohannes Doerfert   auto MachineOrErr = utils::elf::checkMachine(Image, getMagicElfBits());
1691330d8983SJohannes Doerfert   if (!MachineOrErr)
1692330d8983SJohannes Doerfert     return MachineOrErr.takeError();
1693330d8983SJohannes Doerfert 
169421f3a609SJoseph Huber   return MachineOrErr;
1695330d8983SJohannes Doerfert }
1696330d8983SJohannes Doerfert 
169721f3a609SJoseph Huber Expected<bool> GenericPluginTy::checkBitcodeImage(StringRef Image) const {
169821f3a609SJoseph Huber   if (identify_magic(Image) != file_magic::bitcode)
169921f3a609SJoseph Huber     return false;
170021f3a609SJoseph Huber 
170121f3a609SJoseph Huber   LLVMContext Context;
170221f3a609SJoseph Huber   auto ModuleOrErr = getLazyBitcodeModule(MemoryBufferRef(Image, ""), Context,
170321f3a609SJoseph Huber                                           /*ShouldLazyLoadMetadata=*/true);
170421f3a609SJoseph Huber   if (!ModuleOrErr)
170521f3a609SJoseph Huber     return ModuleOrErr.takeError();
170621f3a609SJoseph Huber   Module &M = **ModuleOrErr;
170721f3a609SJoseph Huber 
170821f3a609SJoseph Huber   return Triple(M.getTargetTriple()).getArch() == getTripleArch();
170921f3a609SJoseph Huber }
171021f3a609SJoseph Huber 
171121f3a609SJoseph Huber int32_t GenericPluginTy::is_initialized() const { return Initialized; }
171221f3a609SJoseph Huber 
1713435aa766SJoseph Huber int32_t GenericPluginTy::is_plugin_compatible(__tgt_device_image *Image) {
1714330d8983SJohannes Doerfert   StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart),
171508533a3eSJohannes Doerfert                    utils::getPtrDiff(Image->ImageEnd, Image->ImageStart));
1716330d8983SJohannes Doerfert 
1717330d8983SJohannes Doerfert   auto HandleError = [&](Error Err) -> bool {
1718330d8983SJohannes Doerfert     [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
1719330d8983SJohannes Doerfert     DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str());
1720330d8983SJohannes Doerfert     return false;
1721330d8983SJohannes Doerfert   };
1722330d8983SJohannes Doerfert   switch (identify_magic(Buffer)) {
1723330d8983SJohannes Doerfert   case file_magic::elf:
1724330d8983SJohannes Doerfert   case file_magic::elf_relocatable:
1725330d8983SJohannes Doerfert   case file_magic::elf_executable:
1726330d8983SJohannes Doerfert   case file_magic::elf_shared_object:
1727330d8983SJohannes Doerfert   case file_magic::elf_core: {
1728330d8983SJohannes Doerfert     auto MatchOrErr = checkELFImage(Buffer);
1729330d8983SJohannes Doerfert     if (Error Err = MatchOrErr.takeError())
1730330d8983SJohannes Doerfert       return HandleError(std::move(Err));
1731330d8983SJohannes Doerfert     return *MatchOrErr;
1732435aa766SJoseph Huber   }
1733435aa766SJoseph Huber   case file_magic::bitcode: {
1734435aa766SJoseph Huber     auto MatchOrErr = checkBitcodeImage(Buffer);
1735435aa766SJoseph Huber     if (Error Err = MatchOrErr.takeError())
1736435aa766SJoseph Huber       return HandleError(std::move(Err));
1737435aa766SJoseph Huber     return *MatchOrErr;
1738435aa766SJoseph Huber   }
1739435aa766SJoseph Huber   default:
1740435aa766SJoseph Huber     return false;
1741435aa766SJoseph Huber   }
1742435aa766SJoseph Huber }
1743435aa766SJoseph Huber 
1744435aa766SJoseph Huber int32_t GenericPluginTy::is_device_compatible(int32_t DeviceId,
1745435aa766SJoseph Huber                                               __tgt_device_image *Image) {
1746435aa766SJoseph Huber   StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart),
174708533a3eSJohannes Doerfert                    utils::getPtrDiff(Image->ImageEnd, Image->ImageStart));
1748435aa766SJoseph Huber 
1749435aa766SJoseph Huber   auto HandleError = [&](Error Err) -> bool {
1750435aa766SJoseph Huber     [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
1751435aa766SJoseph Huber     DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str());
1752435aa766SJoseph Huber     return false;
1753435aa766SJoseph Huber   };
1754435aa766SJoseph Huber   switch (identify_magic(Buffer)) {
1755435aa766SJoseph Huber   case file_magic::elf:
1756435aa766SJoseph Huber   case file_magic::elf_relocatable:
1757435aa766SJoseph Huber   case file_magic::elf_executable:
1758435aa766SJoseph Huber   case file_magic::elf_shared_object:
1759435aa766SJoseph Huber   case file_magic::elf_core: {
1760435aa766SJoseph Huber     auto MatchOrErr = checkELFImage(Buffer);
1761435aa766SJoseph Huber     if (Error Err = MatchOrErr.takeError())
1762435aa766SJoseph Huber       return HandleError(std::move(Err));
1763435aa766SJoseph Huber     if (!*MatchOrErr)
1764435aa766SJoseph Huber       return false;
176521f3a609SJoseph Huber 
176621f3a609SJoseph Huber     // Perform plugin-dependent checks for the specific architecture if needed.
1767435aa766SJoseph Huber     auto CompatibleOrErr = isELFCompatible(DeviceId, Buffer);
176821f3a609SJoseph Huber     if (Error Err = CompatibleOrErr.takeError())
176921f3a609SJoseph Huber       return HandleError(std::move(Err));
177021f3a609SJoseph Huber     return *CompatibleOrErr;
1771330d8983SJohannes Doerfert   }
1772330d8983SJohannes Doerfert   case file_magic::bitcode: {
177321f3a609SJoseph Huber     auto MatchOrErr = checkBitcodeImage(Buffer);
1774330d8983SJohannes Doerfert     if (Error Err = MatchOrErr.takeError())
1775330d8983SJohannes Doerfert       return HandleError(std::move(Err));
1776330d8983SJohannes Doerfert     return *MatchOrErr;
1777330d8983SJohannes Doerfert   }
1778330d8983SJohannes Doerfert   default:
1779330d8983SJohannes Doerfert     return false;
1780330d8983SJohannes Doerfert   }
1781330d8983SJohannes Doerfert }
1782330d8983SJohannes Doerfert 
1783435aa766SJoseph Huber int32_t GenericPluginTy::is_device_initialized(int32_t DeviceId) const {
1784435aa766SJoseph Huber   return isValidDeviceId(DeviceId) && Devices[DeviceId] != nullptr;
1785435aa766SJoseph Huber }
1786435aa766SJoseph Huber 
1787330d8983SJohannes Doerfert int32_t GenericPluginTy::init_device(int32_t DeviceId) {
1788330d8983SJohannes Doerfert   auto Err = initDevice(DeviceId);
1789330d8983SJohannes Doerfert   if (Err) {
1790330d8983SJohannes Doerfert     REPORT("Failure to initialize device %d: %s\n", DeviceId,
1791330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1792330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1793330d8983SJohannes Doerfert   }
1794330d8983SJohannes Doerfert 
1795330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1796330d8983SJohannes Doerfert }
1797330d8983SJohannes Doerfert 
1798330d8983SJohannes Doerfert int32_t GenericPluginTy::number_of_devices() { return getNumDevices(); }
1799330d8983SJohannes Doerfert 
1800330d8983SJohannes Doerfert int32_t GenericPluginTy::is_data_exchangable(int32_t SrcDeviceId,
1801330d8983SJohannes Doerfert                                              int32_t DstDeviceId) {
1802330d8983SJohannes Doerfert   return isDataExchangable(SrcDeviceId, DstDeviceId);
1803330d8983SJohannes Doerfert }
1804330d8983SJohannes Doerfert 
1805330d8983SJohannes Doerfert int32_t GenericPluginTy::initialize_record_replay(int32_t DeviceId,
1806330d8983SJohannes Doerfert                                                   int64_t MemorySize,
1807330d8983SJohannes Doerfert                                                   void *VAddr, bool isRecord,
1808330d8983SJohannes Doerfert                                                   bool SaveOutput,
1809330d8983SJohannes Doerfert                                                   uint64_t &ReqPtrArgOffset) {
1810330d8983SJohannes Doerfert   GenericDeviceTy &Device = getDevice(DeviceId);
1811330d8983SJohannes Doerfert   RecordReplayTy::RRStatusTy Status =
1812330d8983SJohannes Doerfert       isRecord ? RecordReplayTy::RRStatusTy::RRRecording
1813330d8983SJohannes Doerfert                : RecordReplayTy::RRStatusTy::RRReplaying;
1814330d8983SJohannes Doerfert 
1815f42f57b5SJoseph Huber   if (auto Err = RecordReplay->init(&Device, MemorySize, VAddr, Status,
1816330d8983SJohannes Doerfert                                     SaveOutput, ReqPtrArgOffset)) {
1817330d8983SJohannes Doerfert     REPORT("WARNING RR did not intialize RR-properly with %lu bytes"
1818330d8983SJohannes Doerfert            "(Error: %s)\n",
1819330d8983SJohannes Doerfert            MemorySize, toString(std::move(Err)).data());
1820f42f57b5SJoseph Huber     RecordReplay->setStatus(RecordReplayTy::RRStatusTy::RRDeactivated);
1821330d8983SJohannes Doerfert 
1822330d8983SJohannes Doerfert     if (!isRecord) {
1823330d8983SJohannes Doerfert       return OFFLOAD_FAIL;
1824330d8983SJohannes Doerfert     }
1825330d8983SJohannes Doerfert   }
1826330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1827330d8983SJohannes Doerfert }
1828330d8983SJohannes Doerfert 
1829330d8983SJohannes Doerfert int32_t GenericPluginTy::load_binary(int32_t DeviceId,
1830330d8983SJohannes Doerfert                                      __tgt_device_image *TgtImage,
1831330d8983SJohannes Doerfert                                      __tgt_device_binary *Binary) {
1832330d8983SJohannes Doerfert   GenericDeviceTy &Device = getDevice(DeviceId);
1833330d8983SJohannes Doerfert 
1834330d8983SJohannes Doerfert   auto ImageOrErr = Device.loadBinary(*this, TgtImage);
1835330d8983SJohannes Doerfert   if (!ImageOrErr) {
1836330d8983SJohannes Doerfert     auto Err = ImageOrErr.takeError();
1837330d8983SJohannes Doerfert     REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage,
1838330d8983SJohannes Doerfert            DeviceId, toString(std::move(Err)).data());
1839330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1840330d8983SJohannes Doerfert   }
1841330d8983SJohannes Doerfert 
1842330d8983SJohannes Doerfert   DeviceImageTy *Image = *ImageOrErr;
1843330d8983SJohannes Doerfert   assert(Image != nullptr && "Invalid Image");
1844330d8983SJohannes Doerfert 
1845330d8983SJohannes Doerfert   *Binary = __tgt_device_binary{reinterpret_cast<uint64_t>(Image)};
1846330d8983SJohannes Doerfert 
1847330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1848330d8983SJohannes Doerfert }
1849330d8983SJohannes Doerfert 
1850330d8983SJohannes Doerfert void *GenericPluginTy::data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
1851330d8983SJohannes Doerfert                                   int32_t Kind) {
1852330d8983SJohannes Doerfert   auto AllocOrErr =
1853330d8983SJohannes Doerfert       getDevice(DeviceId).dataAlloc(Size, HostPtr, (TargetAllocTy)Kind);
1854330d8983SJohannes Doerfert   if (!AllocOrErr) {
1855330d8983SJohannes Doerfert     auto Err = AllocOrErr.takeError();
1856330d8983SJohannes Doerfert     REPORT("Failure to allocate device memory: %s\n",
1857330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1858330d8983SJohannes Doerfert     return nullptr;
1859330d8983SJohannes Doerfert   }
1860330d8983SJohannes Doerfert   assert(*AllocOrErr && "Null pointer upon successful allocation");
1861330d8983SJohannes Doerfert 
1862330d8983SJohannes Doerfert   return *AllocOrErr;
1863330d8983SJohannes Doerfert }
1864330d8983SJohannes Doerfert 
1865330d8983SJohannes Doerfert int32_t GenericPluginTy::data_delete(int32_t DeviceId, void *TgtPtr,
1866330d8983SJohannes Doerfert                                      int32_t Kind) {
1867330d8983SJohannes Doerfert   auto Err =
1868330d8983SJohannes Doerfert       getDevice(DeviceId).dataDelete(TgtPtr, static_cast<TargetAllocTy>(Kind));
1869330d8983SJohannes Doerfert   if (Err) {
1870330d8983SJohannes Doerfert     REPORT("Failure to deallocate device pointer %p: %s\n", TgtPtr,
1871330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1872330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1873330d8983SJohannes Doerfert   }
1874330d8983SJohannes Doerfert 
1875330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1876330d8983SJohannes Doerfert }
1877330d8983SJohannes Doerfert 
1878330d8983SJohannes Doerfert int32_t GenericPluginTy::data_lock(int32_t DeviceId, void *Ptr, int64_t Size,
1879330d8983SJohannes Doerfert                                    void **LockedPtr) {
1880330d8983SJohannes Doerfert   auto LockedPtrOrErr = getDevice(DeviceId).dataLock(Ptr, Size);
1881330d8983SJohannes Doerfert   if (!LockedPtrOrErr) {
1882330d8983SJohannes Doerfert     auto Err = LockedPtrOrErr.takeError();
1883330d8983SJohannes Doerfert     REPORT("Failure to lock memory %p: %s\n", Ptr,
1884330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1885330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1886330d8983SJohannes Doerfert   }
1887330d8983SJohannes Doerfert 
1888330d8983SJohannes Doerfert   if (!(*LockedPtrOrErr)) {
1889330d8983SJohannes Doerfert     REPORT("Failure to lock memory %p: obtained a null locked pointer\n", Ptr);
1890330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1891330d8983SJohannes Doerfert   }
1892330d8983SJohannes Doerfert   *LockedPtr = *LockedPtrOrErr;
1893330d8983SJohannes Doerfert 
1894330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1895330d8983SJohannes Doerfert }
1896330d8983SJohannes Doerfert 
1897330d8983SJohannes Doerfert int32_t GenericPluginTy::data_unlock(int32_t DeviceId, void *Ptr) {
1898330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).dataUnlock(Ptr);
1899330d8983SJohannes Doerfert   if (Err) {
1900330d8983SJohannes Doerfert     REPORT("Failure to unlock memory %p: %s\n", Ptr,
1901330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1902330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1903330d8983SJohannes Doerfert   }
1904330d8983SJohannes Doerfert 
1905330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1906330d8983SJohannes Doerfert }
1907330d8983SJohannes Doerfert 
1908330d8983SJohannes Doerfert int32_t GenericPluginTy::data_notify_mapped(int32_t DeviceId, void *HstPtr,
1909330d8983SJohannes Doerfert                                             int64_t Size) {
1910330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).notifyDataMapped(HstPtr, Size);
1911330d8983SJohannes Doerfert   if (Err) {
1912330d8983SJohannes Doerfert     REPORT("Failure to notify data mapped %p: %s\n", HstPtr,
1913330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1914330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1915330d8983SJohannes Doerfert   }
1916330d8983SJohannes Doerfert 
1917330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1918330d8983SJohannes Doerfert }
1919330d8983SJohannes Doerfert 
1920330d8983SJohannes Doerfert int32_t GenericPluginTy::data_notify_unmapped(int32_t DeviceId, void *HstPtr) {
1921330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).notifyDataUnmapped(HstPtr);
1922330d8983SJohannes Doerfert   if (Err) {
1923330d8983SJohannes Doerfert     REPORT("Failure to notify data unmapped %p: %s\n", HstPtr,
1924330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1925330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1926330d8983SJohannes Doerfert   }
1927330d8983SJohannes Doerfert 
1928330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1929330d8983SJohannes Doerfert }
1930330d8983SJohannes Doerfert 
1931330d8983SJohannes Doerfert int32_t GenericPluginTy::data_submit(int32_t DeviceId, void *TgtPtr,
1932330d8983SJohannes Doerfert                                      void *HstPtr, int64_t Size) {
1933330d8983SJohannes Doerfert   return data_submit_async(DeviceId, TgtPtr, HstPtr, Size,
1934330d8983SJohannes Doerfert                            /*AsyncInfoPtr=*/nullptr);
1935330d8983SJohannes Doerfert }
1936330d8983SJohannes Doerfert 
1937330d8983SJohannes Doerfert int32_t GenericPluginTy::data_submit_async(int32_t DeviceId, void *TgtPtr,
1938330d8983SJohannes Doerfert                                            void *HstPtr, int64_t Size,
1939330d8983SJohannes Doerfert                                            __tgt_async_info *AsyncInfoPtr) {
1940330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, Size, AsyncInfoPtr);
1941330d8983SJohannes Doerfert   if (Err) {
1942330d8983SJohannes Doerfert     REPORT("Failure to copy data from host to device. Pointers: host "
1943330d8983SJohannes Doerfert            "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
1944330d8983SJohannes Doerfert            DPxPTR(HstPtr), DPxPTR(TgtPtr), Size,
1945330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1946330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1947330d8983SJohannes Doerfert   }
1948330d8983SJohannes Doerfert 
1949330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1950330d8983SJohannes Doerfert }
1951330d8983SJohannes Doerfert 
1952330d8983SJohannes Doerfert int32_t GenericPluginTy::data_retrieve(int32_t DeviceId, void *HstPtr,
1953330d8983SJohannes Doerfert                                        void *TgtPtr, int64_t Size) {
1954330d8983SJohannes Doerfert   return data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size,
1955330d8983SJohannes Doerfert                              /*AsyncInfoPtr=*/nullptr);
1956330d8983SJohannes Doerfert }
1957330d8983SJohannes Doerfert 
1958330d8983SJohannes Doerfert int32_t GenericPluginTy::data_retrieve_async(int32_t DeviceId, void *HstPtr,
1959330d8983SJohannes Doerfert                                              void *TgtPtr, int64_t Size,
1960330d8983SJohannes Doerfert                                              __tgt_async_info *AsyncInfoPtr) {
1961330d8983SJohannes Doerfert   auto Err =
1962330d8983SJohannes Doerfert       getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr, Size, AsyncInfoPtr);
1963330d8983SJohannes Doerfert   if (Err) {
1964330d8983SJohannes Doerfert     REPORT("Faliure to copy data from device to host. Pointers: host "
1965330d8983SJohannes Doerfert            "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
1966330d8983SJohannes Doerfert            DPxPTR(HstPtr), DPxPTR(TgtPtr), Size,
1967330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1968330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1969330d8983SJohannes Doerfert   }
1970330d8983SJohannes Doerfert 
1971330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1972330d8983SJohannes Doerfert }
1973330d8983SJohannes Doerfert 
1974330d8983SJohannes Doerfert int32_t GenericPluginTy::data_exchange(int32_t SrcDeviceId, void *SrcPtr,
1975330d8983SJohannes Doerfert                                        int32_t DstDeviceId, void *DstPtr,
1976330d8983SJohannes Doerfert                                        int64_t Size) {
1977330d8983SJohannes Doerfert   return data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr, Size,
1978330d8983SJohannes Doerfert                              /*AsyncInfoPtr=*/nullptr);
1979330d8983SJohannes Doerfert }
1980330d8983SJohannes Doerfert 
1981330d8983SJohannes Doerfert int32_t GenericPluginTy::data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
1982330d8983SJohannes Doerfert                                              int DstDeviceId, void *DstPtr,
1983330d8983SJohannes Doerfert                                              int64_t Size,
1984330d8983SJohannes Doerfert                                              __tgt_async_info *AsyncInfo) {
1985330d8983SJohannes Doerfert   GenericDeviceTy &SrcDevice = getDevice(SrcDeviceId);
1986330d8983SJohannes Doerfert   GenericDeviceTy &DstDevice = getDevice(DstDeviceId);
1987330d8983SJohannes Doerfert   auto Err = SrcDevice.dataExchange(SrcPtr, DstDevice, DstPtr, Size, AsyncInfo);
1988330d8983SJohannes Doerfert   if (Err) {
1989330d8983SJohannes Doerfert     REPORT("Failure to copy data from device (%d) to device (%d). Pointers: "
1990330d8983SJohannes Doerfert            "host = " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
1991330d8983SJohannes Doerfert            SrcDeviceId, DstDeviceId, DPxPTR(SrcPtr), DPxPTR(DstPtr), Size,
1992330d8983SJohannes Doerfert            toString(std::move(Err)).data());
1993330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
1994330d8983SJohannes Doerfert   }
1995330d8983SJohannes Doerfert 
1996330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
1997330d8983SJohannes Doerfert }
1998330d8983SJohannes Doerfert 
1999330d8983SJohannes Doerfert int32_t GenericPluginTy::launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
2000330d8983SJohannes Doerfert                                        void **TgtArgs, ptrdiff_t *TgtOffsets,
2001330d8983SJohannes Doerfert                                        KernelArgsTy *KernelArgs,
2002330d8983SJohannes Doerfert                                        __tgt_async_info *AsyncInfoPtr) {
2003330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).launchKernel(TgtEntryPtr, TgtArgs, TgtOffsets,
2004330d8983SJohannes Doerfert                                               *KernelArgs, AsyncInfoPtr);
2005330d8983SJohannes Doerfert   if (Err) {
2006330d8983SJohannes Doerfert     REPORT("Failure to run target region " DPxMOD " in device %d: %s\n",
2007330d8983SJohannes Doerfert            DPxPTR(TgtEntryPtr), DeviceId, toString(std::move(Err)).data());
2008330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2009330d8983SJohannes Doerfert   }
2010330d8983SJohannes Doerfert 
2011330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2012330d8983SJohannes Doerfert }
2013330d8983SJohannes Doerfert 
2014330d8983SJohannes Doerfert int32_t GenericPluginTy::synchronize(int32_t DeviceId,
2015330d8983SJohannes Doerfert                                      __tgt_async_info *AsyncInfoPtr) {
2016330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).synchronize(AsyncInfoPtr);
2017330d8983SJohannes Doerfert   if (Err) {
2018330d8983SJohannes Doerfert     REPORT("Failure to synchronize stream %p: %s\n", AsyncInfoPtr->Queue,
2019330d8983SJohannes Doerfert            toString(std::move(Err)).data());
2020330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2021330d8983SJohannes Doerfert   }
2022330d8983SJohannes Doerfert 
2023330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2024330d8983SJohannes Doerfert }
2025330d8983SJohannes Doerfert 
2026330d8983SJohannes Doerfert int32_t GenericPluginTy::query_async(int32_t DeviceId,
2027330d8983SJohannes Doerfert                                      __tgt_async_info *AsyncInfoPtr) {
2028330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).queryAsync(AsyncInfoPtr);
2029330d8983SJohannes Doerfert   if (Err) {
2030330d8983SJohannes Doerfert     REPORT("Failure to query stream %p: %s\n", AsyncInfoPtr->Queue,
2031330d8983SJohannes Doerfert            toString(std::move(Err)).data());
2032330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2033330d8983SJohannes Doerfert   }
2034330d8983SJohannes Doerfert 
2035330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2036330d8983SJohannes Doerfert }
2037330d8983SJohannes Doerfert 
2038330d8983SJohannes Doerfert void GenericPluginTy::print_device_info(int32_t DeviceId) {
2039330d8983SJohannes Doerfert   if (auto Err = getDevice(DeviceId).printInfo())
2040330d8983SJohannes Doerfert     REPORT("Failure to print device %d info: %s\n", DeviceId,
2041330d8983SJohannes Doerfert            toString(std::move(Err)).data());
2042330d8983SJohannes Doerfert }
2043330d8983SJohannes Doerfert 
2044330d8983SJohannes Doerfert int32_t GenericPluginTy::create_event(int32_t DeviceId, void **EventPtr) {
2045330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).createEvent(EventPtr);
2046330d8983SJohannes Doerfert   if (Err) {
2047330d8983SJohannes Doerfert     REPORT("Failure to create event: %s\n", toString(std::move(Err)).data());
2048330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2049330d8983SJohannes Doerfert   }
2050330d8983SJohannes Doerfert 
2051330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2052330d8983SJohannes Doerfert }
2053330d8983SJohannes Doerfert 
2054330d8983SJohannes Doerfert int32_t GenericPluginTy::record_event(int32_t DeviceId, void *EventPtr,
2055330d8983SJohannes Doerfert                                       __tgt_async_info *AsyncInfoPtr) {
2056330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr);
2057330d8983SJohannes Doerfert   if (Err) {
2058330d8983SJohannes Doerfert     REPORT("Failure to record event %p: %s\n", EventPtr,
2059330d8983SJohannes Doerfert            toString(std::move(Err)).data());
2060330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2061330d8983SJohannes Doerfert   }
2062330d8983SJohannes Doerfert 
2063330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2064330d8983SJohannes Doerfert }
2065330d8983SJohannes Doerfert 
2066330d8983SJohannes Doerfert int32_t GenericPluginTy::wait_event(int32_t DeviceId, void *EventPtr,
2067330d8983SJohannes Doerfert                                     __tgt_async_info *AsyncInfoPtr) {
2068330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr);
2069330d8983SJohannes Doerfert   if (Err) {
2070330d8983SJohannes Doerfert     REPORT("Failure to wait event %p: %s\n", EventPtr,
2071330d8983SJohannes Doerfert            toString(std::move(Err)).data());
2072330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2073330d8983SJohannes Doerfert   }
2074330d8983SJohannes Doerfert 
2075330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2076330d8983SJohannes Doerfert }
2077330d8983SJohannes Doerfert 
2078330d8983SJohannes Doerfert int32_t GenericPluginTy::sync_event(int32_t DeviceId, void *EventPtr) {
2079330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).syncEvent(EventPtr);
2080330d8983SJohannes Doerfert   if (Err) {
2081330d8983SJohannes Doerfert     REPORT("Failure to synchronize event %p: %s\n", EventPtr,
2082330d8983SJohannes Doerfert            toString(std::move(Err)).data());
2083330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2084330d8983SJohannes Doerfert   }
2085330d8983SJohannes Doerfert 
2086330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2087330d8983SJohannes Doerfert }
2088330d8983SJohannes Doerfert 
2089330d8983SJohannes Doerfert int32_t GenericPluginTy::destroy_event(int32_t DeviceId, void *EventPtr) {
2090330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).destroyEvent(EventPtr);
2091330d8983SJohannes Doerfert   if (Err) {
2092330d8983SJohannes Doerfert     REPORT("Failure to destroy event %p: %s\n", EventPtr,
2093330d8983SJohannes Doerfert            toString(std::move(Err)).data());
2094330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2095330d8983SJohannes Doerfert   }
2096330d8983SJohannes Doerfert 
2097330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2098330d8983SJohannes Doerfert }
2099330d8983SJohannes Doerfert 
2100330d8983SJohannes Doerfert void GenericPluginTy::set_info_flag(uint32_t NewInfoLevel) {
2101330d8983SJohannes Doerfert   std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
2102330d8983SJohannes Doerfert   InfoLevel.store(NewInfoLevel);
2103330d8983SJohannes Doerfert }
2104330d8983SJohannes Doerfert 
2105330d8983SJohannes Doerfert int32_t GenericPluginTy::init_async_info(int32_t DeviceId,
2106330d8983SJohannes Doerfert                                          __tgt_async_info **AsyncInfoPtr) {
2107330d8983SJohannes Doerfert   assert(AsyncInfoPtr && "Invalid async info");
2108330d8983SJohannes Doerfert 
2109330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr);
2110330d8983SJohannes Doerfert   if (Err) {
2111330d8983SJohannes Doerfert     REPORT("Failure to initialize async info at " DPxMOD " on device %d: %s\n",
2112330d8983SJohannes Doerfert            DPxPTR(*AsyncInfoPtr), DeviceId, toString(std::move(Err)).data());
2113330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2114330d8983SJohannes Doerfert   }
2115330d8983SJohannes Doerfert 
2116330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2117330d8983SJohannes Doerfert }
2118330d8983SJohannes Doerfert 
2119330d8983SJohannes Doerfert int32_t GenericPluginTy::init_device_info(int32_t DeviceId,
2120330d8983SJohannes Doerfert                                           __tgt_device_info *DeviceInfo,
2121330d8983SJohannes Doerfert                                           const char **ErrStr) {
2122330d8983SJohannes Doerfert   *ErrStr = "";
2123330d8983SJohannes Doerfert 
2124330d8983SJohannes Doerfert   auto Err = getDevice(DeviceId).initDeviceInfo(DeviceInfo);
2125330d8983SJohannes Doerfert   if (Err) {
2126330d8983SJohannes Doerfert     REPORT("Failure to initialize device info at " DPxMOD " on device %d: %s\n",
2127330d8983SJohannes Doerfert            DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data());
2128330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2129330d8983SJohannes Doerfert   }
2130330d8983SJohannes Doerfert 
2131330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2132330d8983SJohannes Doerfert }
2133330d8983SJohannes Doerfert 
2134435aa766SJoseph Huber int32_t GenericPluginTy::set_device_identifier(int32_t UserId,
2135435aa766SJoseph Huber                                                int32_t DeviceId) {
2136435aa766SJoseph Huber   UserDeviceIds[DeviceId] = UserId;
2137330d8983SJohannes Doerfert 
2138330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2139330d8983SJohannes Doerfert }
2140330d8983SJohannes Doerfert 
2141330d8983SJohannes Doerfert int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) {
2142330d8983SJohannes Doerfert   return getDevice(DeviceId).useAutoZeroCopy();
2143330d8983SJohannes Doerfert }
2144330d8983SJohannes Doerfert 
2145330d8983SJohannes Doerfert int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size,
2146330d8983SJohannes Doerfert                                     const char *Name, void **DevicePtr) {
2147330d8983SJohannes Doerfert   assert(Binary.handle && "Invalid device binary handle");
2148330d8983SJohannes Doerfert   DeviceImageTy &Image = *reinterpret_cast<DeviceImageTy *>(Binary.handle);
2149330d8983SJohannes Doerfert 
2150330d8983SJohannes Doerfert   GenericDeviceTy &Device = Image.getDevice();
2151330d8983SJohannes Doerfert 
2152330d8983SJohannes Doerfert   GlobalTy DeviceGlobal(Name, Size);
2153330d8983SJohannes Doerfert   GenericGlobalHandlerTy &GHandler = getGlobalHandler();
2154330d8983SJohannes Doerfert   if (auto Err =
2155330d8983SJohannes Doerfert           GHandler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) {
2156330d8983SJohannes Doerfert     REPORT("Failure to look up global address: %s\n",
2157330d8983SJohannes Doerfert            toString(std::move(Err)).data());
2158330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2159330d8983SJohannes Doerfert   }
2160330d8983SJohannes Doerfert 
2161330d8983SJohannes Doerfert   *DevicePtr = DeviceGlobal.getPtr();
2162330d8983SJohannes Doerfert   assert(DevicePtr && "Invalid device global's address");
2163330d8983SJohannes Doerfert 
2164330d8983SJohannes Doerfert   // Save the loaded globals if we are recording.
2165f42f57b5SJoseph Huber   RecordReplayTy &RecordReplay = Device.Plugin.getRecordReplay();
2166330d8983SJohannes Doerfert   if (RecordReplay.isRecording())
2167330d8983SJohannes Doerfert     RecordReplay.addEntry(Name, Size, *DevicePtr);
2168330d8983SJohannes Doerfert 
2169330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2170330d8983SJohannes Doerfert }
2171330d8983SJohannes Doerfert 
2172330d8983SJohannes Doerfert int32_t GenericPluginTy::get_function(__tgt_device_binary Binary,
2173330d8983SJohannes Doerfert                                       const char *Name, void **KernelPtr) {
2174330d8983SJohannes Doerfert   assert(Binary.handle && "Invalid device binary handle");
2175330d8983SJohannes Doerfert   DeviceImageTy &Image = *reinterpret_cast<DeviceImageTy *>(Binary.handle);
2176330d8983SJohannes Doerfert 
2177330d8983SJohannes Doerfert   GenericDeviceTy &Device = Image.getDevice();
2178330d8983SJohannes Doerfert 
2179330d8983SJohannes Doerfert   auto KernelOrErr = Device.constructKernel(Name);
2180330d8983SJohannes Doerfert   if (Error Err = KernelOrErr.takeError()) {
2181330d8983SJohannes Doerfert     REPORT("Failure to look up kernel: %s\n", toString(std::move(Err)).data());
2182330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2183330d8983SJohannes Doerfert   }
2184330d8983SJohannes Doerfert 
2185330d8983SJohannes Doerfert   GenericKernelTy &Kernel = *KernelOrErr;
2186330d8983SJohannes Doerfert   if (auto Err = Kernel.init(Device, Image)) {
2187330d8983SJohannes Doerfert     REPORT("Failure to init kernel: %s\n", toString(std::move(Err)).data());
2188330d8983SJohannes Doerfert     return OFFLOAD_FAIL;
2189330d8983SJohannes Doerfert   }
2190330d8983SJohannes Doerfert 
2191330d8983SJohannes Doerfert   // Note that this is not the kernel's device address.
2192330d8983SJohannes Doerfert   *KernelPtr = &Kernel;
2193330d8983SJohannes Doerfert   return OFFLOAD_SUCCESS;
2194330d8983SJohannes Doerfert }
2195