xref: /llvm-project/offload/src/device.cpp (revision 3abd3d6e597cba5161f37fa0478382fc93a8c9fd)
1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing devices that are handled by RTL plugins.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "device.h"
14 #include "OffloadEntry.h"
15 #include "OpenMP/Mapping.h"
16 #include "OpenMP/OMPT/Callback.h"
17 #include "OpenMP/OMPT/Interface.h"
18 #include "PluginManager.h"
19 #include "Shared/APITypes.h"
20 #include "Shared/Debug.h"
21 #include "omptarget.h"
22 #include "private.h"
23 #include "rtl.h"
24 
25 #include "Shared/EnvironmentVar.h"
26 #include "llvm/Support/Error.h"
27 
28 #include <cassert>
29 #include <climits>
30 #include <cstdint>
31 #include <cstdio>
32 #include <mutex>
33 #include <string>
34 #include <thread>
35 
36 #ifdef OMPT_SUPPORT
37 using namespace llvm::omp::target::ompt;
38 #endif
39 
addEventIfNecessary(DeviceTy & Device,AsyncInfoTy & AsyncInfo) const40 int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
41                                             AsyncInfoTy &AsyncInfo) const {
42   // First, check if the user disabled atomic map transfer/malloc/dealloc.
43   if (!MappingConfig::get().UseEventsForAtomicTransfers)
44     return OFFLOAD_SUCCESS;
45 
46   void *Event = getEvent();
47   bool NeedNewEvent = Event == nullptr;
48   if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) {
49     REPORT("Failed to create event\n");
50     return OFFLOAD_FAIL;
51   }
52 
53   // We cannot assume the event should not be nullptr because we don't
54   // know if the target support event. But if a target doesn't,
55   // recordEvent should always return success.
56   if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
57     REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event));
58     return OFFLOAD_FAIL;
59   }
60 
61   if (NeedNewEvent)
62     setEvent(Event);
63 
64   return OFFLOAD_SUCCESS;
65 }
66 
DeviceTy(GenericPluginTy * RTL,int32_t DeviceID,int32_t RTLDeviceID)67 DeviceTy::DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID)
68     : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID),
69       MappingInfo(*this) {}
70 
~DeviceTy()71 DeviceTy::~DeviceTy() {
72   if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE))
73     return;
74 
75   ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
76   dumpTargetPointerMappings(&Loc, *this);
77 }
78 
init()79 llvm::Error DeviceTy::init() {
80   int32_t Ret = RTL->init_device(RTLDeviceID);
81   if (Ret != OFFLOAD_SUCCESS)
82     return llvm::createStringError(llvm::inconvertibleErrorCode(),
83                                    "Failed to initialize device %d\n",
84                                    DeviceID);
85 
86   // Enables recording kernels if set.
87   BoolEnvar OMPX_RecordKernel("LIBOMPTARGET_RECORD", false);
88   if (OMPX_RecordKernel) {
89     // Enables saving the device memory kernel output post execution if set.
90     BoolEnvar OMPX_ReplaySaveOutput("LIBOMPTARGET_RR_SAVE_OUTPUT", false);
91 
92     uint64_t ReqPtrArgOffset;
93     RTL->initialize_record_replay(RTLDeviceID, 0, nullptr, true,
94                                   OMPX_ReplaySaveOutput, ReqPtrArgOffset);
95   }
96 
97   return llvm::Error::success();
98 }
99 
100 // Load binary to device.
101 llvm::Expected<__tgt_device_binary>
loadBinary(__tgt_device_image * Img)102 DeviceTy::loadBinary(__tgt_device_image *Img) {
103   __tgt_device_binary Binary;
104 
105   if (RTL->load_binary(RTLDeviceID, Img, &Binary) != OFFLOAD_SUCCESS)
106     return llvm::createStringError(llvm::inconvertibleErrorCode(),
107                                    "Failed to load binary %p", Img);
108   return Binary;
109 }
110 
allocData(int64_t Size,void * HstPtr,int32_t Kind)111 void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
112   /// RAII to establish tool anchors before and after data allocation
113   void *TargetPtr = nullptr;
114   OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
115                     RegionInterface.getCallbacks<ompt_target_data_alloc>(),
116                     DeviceID, HstPtr, &TargetPtr, Size,
117                     /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
118 
119   TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
120   return TargetPtr;
121 }
122 
deleteData(void * TgtAllocBegin,int32_t Kind)123 int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) {
124   /// RAII to establish tool anchors before and after data deletion
125   OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII(
126                     RegionInterface.getCallbacks<ompt_target_data_delete>(),
127                     DeviceID, TgtAllocBegin,
128                     /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
129 
130   return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind);
131 }
132 
133 // Submit data to device
submitData(void * TgtPtrBegin,void * HstPtrBegin,int64_t Size,AsyncInfoTy & AsyncInfo,HostDataToTargetTy * Entry,MappingInfoTy::HDTTMapAccessorTy * HDTTMapPtr)134 int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
135                              AsyncInfoTy &AsyncInfo, HostDataToTargetTy *Entry,
136                              MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) {
137   if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER)
138     MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/true,
139                               Entry, HDTTMapPtr);
140 
141   /// RAII to establish tool anchors before and after data submit
142   OMPT_IF_BUILT(
143       InterfaceRAII TargetDataSubmitRAII(
144           RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
145           omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
146           /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
147 
148   return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
149                                 AsyncInfo);
150 }
151 
152 // Retrieve data from device
retrieveData(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size,AsyncInfoTy & AsyncInfo,HostDataToTargetTy * Entry,MappingInfoTy::HDTTMapAccessorTy * HDTTMapPtr)153 int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
154                                int64_t Size, AsyncInfoTy &AsyncInfo,
155                                HostDataToTargetTy *Entry,
156                                MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) {
157   if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER)
158     MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/false,
159                               Entry, HDTTMapPtr);
160 
161   /// RAII to establish tool anchors before and after data retrieval
162   OMPT_IF_BUILT(
163       InterfaceRAII TargetDataRetrieveRAII(
164           RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
165           DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
166           /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
167 
168   return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
169                                   AsyncInfo);
170 }
171 
172 // Copy data from current device to destination device directly
dataExchange(void * SrcPtr,DeviceTy & DstDev,void * DstPtr,int64_t Size,AsyncInfoTy & AsyncInfo)173 int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
174                                int64_t Size, AsyncInfoTy &AsyncInfo) {
175   /// RAII to establish tool anchors before and after data exchange
176   /// Note: Despite the fact that this is a data exchange, we use 'from_device'
177   ///       operation enum (w.r.t. ompt_target_data_op_t) as there is currently
178   ///       no better alternative. It is still possible to distinguish this
179   ///       scenario from a real data retrieve by checking if both involved
180   ///       device numbers are less than omp_get_num_devices().
181   OMPT_IF_BUILT(
182       InterfaceRAII TargetDataExchangeRAII(
183           RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
184           RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
185           /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
186   if (!AsyncInfo) {
187     return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
188                               Size);
189   }
190   return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
191                                   DstPtr, Size, AsyncInfo);
192 }
193 
notifyDataMapped(void * HstPtr,int64_t Size)194 int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) {
195   DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n",
196      DPxPTR(HstPtr), Size);
197 
198   if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) {
199     REPORT("Notifiying about data mapping failed.\n");
200     return OFFLOAD_FAIL;
201   }
202   return OFFLOAD_SUCCESS;
203 }
204 
notifyDataUnmapped(void * HstPtr)205 int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) {
206   DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr));
207 
208   if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) {
209     REPORT("Notifiying about data unmapping failed.\n");
210     return OFFLOAD_FAIL;
211   }
212   return OFFLOAD_SUCCESS;
213 }
214 
215 // Run region on device
launchKernel(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,KernelArgsTy & KernelArgs,AsyncInfoTy & AsyncInfo)216 int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
217                                ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs,
218                                AsyncInfoTy &AsyncInfo) {
219   return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
220                             &KernelArgs, AsyncInfo);
221 }
222 
223 // Run region on device
printDeviceInfo()224 bool DeviceTy::printDeviceInfo() {
225   RTL->print_device_info(RTLDeviceID);
226   return true;
227 }
228 
229 // Whether data can be copied to DstDevice directly
isDataExchangable(const DeviceTy & DstDevice)230 bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) {
231   if (RTL != DstDevice.RTL)
232     return false;
233 
234   if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID))
235     return true;
236   return false;
237 }
238 
synchronize(AsyncInfoTy & AsyncInfo)239 int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) {
240   return RTL->synchronize(RTLDeviceID, AsyncInfo);
241 }
242 
queryAsync(AsyncInfoTy & AsyncInfo)243 int32_t DeviceTy::queryAsync(AsyncInfoTy &AsyncInfo) {
244   return RTL->query_async(RTLDeviceID, AsyncInfo);
245 }
246 
createEvent(void ** Event)247 int32_t DeviceTy::createEvent(void **Event) {
248   return RTL->create_event(RTLDeviceID, Event);
249 }
250 
recordEvent(void * Event,AsyncInfoTy & AsyncInfo)251 int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) {
252   return RTL->record_event(RTLDeviceID, Event, AsyncInfo);
253 }
254 
waitEvent(void * Event,AsyncInfoTy & AsyncInfo)255 int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) {
256   return RTL->wait_event(RTLDeviceID, Event, AsyncInfo);
257 }
258 
syncEvent(void * Event)259 int32_t DeviceTy::syncEvent(void *Event) {
260   return RTL->sync_event(RTLDeviceID, Event);
261 }
262 
destroyEvent(void * Event)263 int32_t DeviceTy::destroyEvent(void *Event) {
264   return RTL->destroy_event(RTLDeviceID, Event);
265 }
266 
dumpOffloadEntries()267 void DeviceTy::dumpOffloadEntries() {
268   fprintf(stderr, "Device %i offload entries:\n", DeviceID);
269   for (auto &It : *DeviceOffloadEntries.getExclusiveAccessor()) {
270     const char *Kind = "kernel";
271     if (It.second.isLink())
272       Kind = "link";
273     else if (It.second.isGlobal())
274       Kind = "global var.";
275     fprintf(stderr, "  %11s: %s\n", Kind, It.second.getNameAsCStr());
276   }
277 }
278 
useAutoZeroCopy()279 bool DeviceTy::useAutoZeroCopy() {
280   if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY)
281     return false;
282   return RTL->use_auto_zero_copy(RTLDeviceID);
283 }
284