1330d8983SJohannes Doerfert //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
2330d8983SJohannes Doerfert //
3330d8983SJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4330d8983SJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
5330d8983SJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6330d8983SJohannes Doerfert //
7330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
8330d8983SJohannes Doerfert //
9330d8983SJohannes Doerfert // Functionality for managing devices that are handled by RTL plugins.
10330d8983SJohannes Doerfert //
11330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
12330d8983SJohannes Doerfert
13330d8983SJohannes Doerfert #include "device.h"
14330d8983SJohannes Doerfert #include "OffloadEntry.h"
15330d8983SJohannes Doerfert #include "OpenMP/Mapping.h"
16330d8983SJohannes Doerfert #include "OpenMP/OMPT/Callback.h"
17330d8983SJohannes Doerfert #include "OpenMP/OMPT/Interface.h"
18330d8983SJohannes Doerfert #include "PluginManager.h"
19330d8983SJohannes Doerfert #include "Shared/APITypes.h"
20330d8983SJohannes Doerfert #include "Shared/Debug.h"
21330d8983SJohannes Doerfert #include "omptarget.h"
22330d8983SJohannes Doerfert #include "private.h"
23330d8983SJohannes Doerfert #include "rtl.h"
24330d8983SJohannes Doerfert
25330d8983SJohannes Doerfert #include "Shared/EnvironmentVar.h"
26330d8983SJohannes Doerfert #include "llvm/Support/Error.h"
27330d8983SJohannes Doerfert
28330d8983SJohannes Doerfert #include <cassert>
29330d8983SJohannes Doerfert #include <climits>
30330d8983SJohannes Doerfert #include <cstdint>
31330d8983SJohannes Doerfert #include <cstdio>
32330d8983SJohannes Doerfert #include <mutex>
33330d8983SJohannes Doerfert #include <string>
34330d8983SJohannes Doerfert #include <thread>
35330d8983SJohannes Doerfert
36330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT
37330d8983SJohannes Doerfert using namespace llvm::omp::target::ompt;
38330d8983SJohannes Doerfert #endif
39330d8983SJohannes Doerfert
addEventIfNecessary(DeviceTy & Device,AsyncInfoTy & AsyncInfo) const40330d8983SJohannes Doerfert int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
41330d8983SJohannes Doerfert AsyncInfoTy &AsyncInfo) const {
42330d8983SJohannes Doerfert // First, check if the user disabled atomic map transfer/malloc/dealloc.
43330d8983SJohannes Doerfert if (!MappingConfig::get().UseEventsForAtomicTransfers)
44330d8983SJohannes Doerfert return OFFLOAD_SUCCESS;
45330d8983SJohannes Doerfert
46330d8983SJohannes Doerfert void *Event = getEvent();
47330d8983SJohannes Doerfert bool NeedNewEvent = Event == nullptr;
48330d8983SJohannes Doerfert if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) {
49330d8983SJohannes Doerfert REPORT("Failed to create event\n");
50330d8983SJohannes Doerfert return OFFLOAD_FAIL;
51330d8983SJohannes Doerfert }
52330d8983SJohannes Doerfert
53330d8983SJohannes Doerfert // We cannot assume the event should not be nullptr because we don't
54330d8983SJohannes Doerfert // know if the target support event. But if a target doesn't,
55330d8983SJohannes Doerfert // recordEvent should always return success.
56330d8983SJohannes Doerfert if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
57330d8983SJohannes Doerfert REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event));
58330d8983SJohannes Doerfert return OFFLOAD_FAIL;
59330d8983SJohannes Doerfert }
60330d8983SJohannes Doerfert
61330d8983SJohannes Doerfert if (NeedNewEvent)
62330d8983SJohannes Doerfert setEvent(Event);
63330d8983SJohannes Doerfert
64330d8983SJohannes Doerfert return OFFLOAD_SUCCESS;
65330d8983SJohannes Doerfert }
66330d8983SJohannes Doerfert
DeviceTy(GenericPluginTy * RTL,int32_t DeviceID,int32_t RTLDeviceID)67fa9e90f5SJoseph Huber DeviceTy::DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID)
68330d8983SJohannes Doerfert : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID),
69330d8983SJohannes Doerfert MappingInfo(*this) {}
70330d8983SJohannes Doerfert
~DeviceTy()71330d8983SJohannes Doerfert DeviceTy::~DeviceTy() {
72330d8983SJohannes Doerfert if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE))
73330d8983SJohannes Doerfert return;
74330d8983SJohannes Doerfert
75330d8983SJohannes Doerfert ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
76330d8983SJohannes Doerfert dumpTargetPointerMappings(&Loc, *this);
77330d8983SJohannes Doerfert }
78330d8983SJohannes Doerfert
init()79330d8983SJohannes Doerfert llvm::Error DeviceTy::init() {
80*3abd3d6eSJoseph Huber int32_t Ret = RTL->init_device(RTLDeviceID);
81330d8983SJohannes Doerfert if (Ret != OFFLOAD_SUCCESS)
82330d8983SJohannes Doerfert return llvm::createStringError(llvm::inconvertibleErrorCode(),
83330d8983SJohannes Doerfert "Failed to initialize device %d\n",
84330d8983SJohannes Doerfert DeviceID);
85330d8983SJohannes Doerfert
86330d8983SJohannes Doerfert // Enables recording kernels if set.
87330d8983SJohannes Doerfert BoolEnvar OMPX_RecordKernel("LIBOMPTARGET_RECORD", false);
88330d8983SJohannes Doerfert if (OMPX_RecordKernel) {
89330d8983SJohannes Doerfert // Enables saving the device memory kernel output post execution if set.
90330d8983SJohannes Doerfert BoolEnvar OMPX_ReplaySaveOutput("LIBOMPTARGET_RR_SAVE_OUTPUT", false);
91330d8983SJohannes Doerfert
92330d8983SJohannes Doerfert uint64_t ReqPtrArgOffset;
93330d8983SJohannes Doerfert RTL->initialize_record_replay(RTLDeviceID, 0, nullptr, true,
94330d8983SJohannes Doerfert OMPX_ReplaySaveOutput, ReqPtrArgOffset);
95330d8983SJohannes Doerfert }
96330d8983SJohannes Doerfert
97330d8983SJohannes Doerfert return llvm::Error::success();
98330d8983SJohannes Doerfert }
99330d8983SJohannes Doerfert
100330d8983SJohannes Doerfert // Load binary to device.
101330d8983SJohannes Doerfert llvm::Expected<__tgt_device_binary>
loadBinary(__tgt_device_image * Img)102330d8983SJohannes Doerfert DeviceTy::loadBinary(__tgt_device_image *Img) {
103330d8983SJohannes Doerfert __tgt_device_binary Binary;
104330d8983SJohannes Doerfert
105330d8983SJohannes Doerfert if (RTL->load_binary(RTLDeviceID, Img, &Binary) != OFFLOAD_SUCCESS)
106330d8983SJohannes Doerfert return llvm::createStringError(llvm::inconvertibleErrorCode(),
107330d8983SJohannes Doerfert "Failed to load binary %p", Img);
108330d8983SJohannes Doerfert return Binary;
109330d8983SJohannes Doerfert }
110330d8983SJohannes Doerfert
allocData(int64_t Size,void * HstPtr,int32_t Kind)111330d8983SJohannes Doerfert void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
112330d8983SJohannes Doerfert /// RAII to establish tool anchors before and after data allocation
113330d8983SJohannes Doerfert void *TargetPtr = nullptr;
114330d8983SJohannes Doerfert OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
115330d8983SJohannes Doerfert RegionInterface.getCallbacks<ompt_target_data_alloc>(),
116330d8983SJohannes Doerfert DeviceID, HstPtr, &TargetPtr, Size,
117330d8983SJohannes Doerfert /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
118330d8983SJohannes Doerfert
119330d8983SJohannes Doerfert TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
120330d8983SJohannes Doerfert return TargetPtr;
121330d8983SJohannes Doerfert }
122330d8983SJohannes Doerfert
deleteData(void * TgtAllocBegin,int32_t Kind)123330d8983SJohannes Doerfert int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) {
124330d8983SJohannes Doerfert /// RAII to establish tool anchors before and after data deletion
125330d8983SJohannes Doerfert OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII(
126330d8983SJohannes Doerfert RegionInterface.getCallbacks<ompt_target_data_delete>(),
127330d8983SJohannes Doerfert DeviceID, TgtAllocBegin,
128330d8983SJohannes Doerfert /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
129330d8983SJohannes Doerfert
130330d8983SJohannes Doerfert return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind);
131330d8983SJohannes Doerfert }
132330d8983SJohannes Doerfert
133330d8983SJohannes Doerfert // Submit data to device
submitData(void * TgtPtrBegin,void * HstPtrBegin,int64_t Size,AsyncInfoTy & AsyncInfo,HostDataToTargetTy * Entry,MappingInfoTy::HDTTMapAccessorTy * HDTTMapPtr)134330d8983SJohannes Doerfert int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
135330d8983SJohannes Doerfert AsyncInfoTy &AsyncInfo, HostDataToTargetTy *Entry,
136330d8983SJohannes Doerfert MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) {
137330d8983SJohannes Doerfert if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER)
138330d8983SJohannes Doerfert MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/true,
139330d8983SJohannes Doerfert Entry, HDTTMapPtr);
140330d8983SJohannes Doerfert
141330d8983SJohannes Doerfert /// RAII to establish tool anchors before and after data submit
142330d8983SJohannes Doerfert OMPT_IF_BUILT(
143330d8983SJohannes Doerfert InterfaceRAII TargetDataSubmitRAII(
144330d8983SJohannes Doerfert RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
145330d8983SJohannes Doerfert omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
146330d8983SJohannes Doerfert /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
147330d8983SJohannes Doerfert
148330d8983SJohannes Doerfert return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
149330d8983SJohannes Doerfert AsyncInfo);
150330d8983SJohannes Doerfert }
151330d8983SJohannes Doerfert
152330d8983SJohannes Doerfert // Retrieve data from device
retrieveData(void * HstPtrBegin,void * TgtPtrBegin,int64_t Size,AsyncInfoTy & AsyncInfo,HostDataToTargetTy * Entry,MappingInfoTy::HDTTMapAccessorTy * HDTTMapPtr)153330d8983SJohannes Doerfert int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
154330d8983SJohannes Doerfert int64_t Size, AsyncInfoTy &AsyncInfo,
155330d8983SJohannes Doerfert HostDataToTargetTy *Entry,
156330d8983SJohannes Doerfert MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) {
157330d8983SJohannes Doerfert if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER)
158330d8983SJohannes Doerfert MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/false,
159330d8983SJohannes Doerfert Entry, HDTTMapPtr);
160330d8983SJohannes Doerfert
161330d8983SJohannes Doerfert /// RAII to establish tool anchors before and after data retrieval
162330d8983SJohannes Doerfert OMPT_IF_BUILT(
163330d8983SJohannes Doerfert InterfaceRAII TargetDataRetrieveRAII(
164330d8983SJohannes Doerfert RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
165330d8983SJohannes Doerfert DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
166330d8983SJohannes Doerfert /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
167330d8983SJohannes Doerfert
168330d8983SJohannes Doerfert return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
169330d8983SJohannes Doerfert AsyncInfo);
170330d8983SJohannes Doerfert }
171330d8983SJohannes Doerfert
172330d8983SJohannes Doerfert // Copy data from current device to destination device directly
dataExchange(void * SrcPtr,DeviceTy & DstDev,void * DstPtr,int64_t Size,AsyncInfoTy & AsyncInfo)173330d8983SJohannes Doerfert int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
174330d8983SJohannes Doerfert int64_t Size, AsyncInfoTy &AsyncInfo) {
175330d8983SJohannes Doerfert /// RAII to establish tool anchors before and after data exchange
176330d8983SJohannes Doerfert /// Note: Despite the fact that this is a data exchange, we use 'from_device'
177330d8983SJohannes Doerfert /// operation enum (w.r.t. ompt_target_data_op_t) as there is currently
178330d8983SJohannes Doerfert /// no better alternative. It is still possible to distinguish this
179330d8983SJohannes Doerfert /// scenario from a real data retrieve by checking if both involved
180330d8983SJohannes Doerfert /// device numbers are less than omp_get_num_devices().
181330d8983SJohannes Doerfert OMPT_IF_BUILT(
182330d8983SJohannes Doerfert InterfaceRAII TargetDataExchangeRAII(
183330d8983SJohannes Doerfert RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
184330d8983SJohannes Doerfert RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
185330d8983SJohannes Doerfert /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
186330d8983SJohannes Doerfert if (!AsyncInfo) {
187330d8983SJohannes Doerfert return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
188330d8983SJohannes Doerfert Size);
189330d8983SJohannes Doerfert }
190330d8983SJohannes Doerfert return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
191330d8983SJohannes Doerfert DstPtr, Size, AsyncInfo);
192330d8983SJohannes Doerfert }
193330d8983SJohannes Doerfert
notifyDataMapped(void * HstPtr,int64_t Size)194330d8983SJohannes Doerfert int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) {
195330d8983SJohannes Doerfert DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n",
196330d8983SJohannes Doerfert DPxPTR(HstPtr), Size);
197330d8983SJohannes Doerfert
198330d8983SJohannes Doerfert if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) {
199330d8983SJohannes Doerfert REPORT("Notifiying about data mapping failed.\n");
200330d8983SJohannes Doerfert return OFFLOAD_FAIL;
201330d8983SJohannes Doerfert }
202330d8983SJohannes Doerfert return OFFLOAD_SUCCESS;
203330d8983SJohannes Doerfert }
204330d8983SJohannes Doerfert
notifyDataUnmapped(void * HstPtr)205330d8983SJohannes Doerfert int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) {
206330d8983SJohannes Doerfert DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr));
207330d8983SJohannes Doerfert
208330d8983SJohannes Doerfert if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) {
209330d8983SJohannes Doerfert REPORT("Notifiying about data unmapping failed.\n");
210330d8983SJohannes Doerfert return OFFLOAD_FAIL;
211330d8983SJohannes Doerfert }
212330d8983SJohannes Doerfert return OFFLOAD_SUCCESS;
213330d8983SJohannes Doerfert }
214330d8983SJohannes Doerfert
215330d8983SJohannes Doerfert // Run region on device
launchKernel(void * TgtEntryPtr,void ** TgtVarsPtr,ptrdiff_t * TgtOffsets,KernelArgsTy & KernelArgs,AsyncInfoTy & AsyncInfo)216330d8983SJohannes Doerfert int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
217330d8983SJohannes Doerfert ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs,
218330d8983SJohannes Doerfert AsyncInfoTy &AsyncInfo) {
219330d8983SJohannes Doerfert return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
220330d8983SJohannes Doerfert &KernelArgs, AsyncInfo);
221330d8983SJohannes Doerfert }
222330d8983SJohannes Doerfert
223330d8983SJohannes Doerfert // Run region on device
printDeviceInfo()224330d8983SJohannes Doerfert bool DeviceTy::printDeviceInfo() {
225330d8983SJohannes Doerfert RTL->print_device_info(RTLDeviceID);
226330d8983SJohannes Doerfert return true;
227330d8983SJohannes Doerfert }
228330d8983SJohannes Doerfert
229330d8983SJohannes Doerfert // Whether data can be copied to DstDevice directly
isDataExchangable(const DeviceTy & DstDevice)230330d8983SJohannes Doerfert bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) {
231330d8983SJohannes Doerfert if (RTL != DstDevice.RTL)
232330d8983SJohannes Doerfert return false;
233330d8983SJohannes Doerfert
234330d8983SJohannes Doerfert if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID))
235330d8983SJohannes Doerfert return true;
236330d8983SJohannes Doerfert return false;
237330d8983SJohannes Doerfert }
238330d8983SJohannes Doerfert
synchronize(AsyncInfoTy & AsyncInfo)239330d8983SJohannes Doerfert int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) {
240330d8983SJohannes Doerfert return RTL->synchronize(RTLDeviceID, AsyncInfo);
241330d8983SJohannes Doerfert }
242330d8983SJohannes Doerfert
queryAsync(AsyncInfoTy & AsyncInfo)243330d8983SJohannes Doerfert int32_t DeviceTy::queryAsync(AsyncInfoTy &AsyncInfo) {
244330d8983SJohannes Doerfert return RTL->query_async(RTLDeviceID, AsyncInfo);
245330d8983SJohannes Doerfert }
246330d8983SJohannes Doerfert
createEvent(void ** Event)247330d8983SJohannes Doerfert int32_t DeviceTy::createEvent(void **Event) {
248330d8983SJohannes Doerfert return RTL->create_event(RTLDeviceID, Event);
249330d8983SJohannes Doerfert }
250330d8983SJohannes Doerfert
recordEvent(void * Event,AsyncInfoTy & AsyncInfo)251330d8983SJohannes Doerfert int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) {
252330d8983SJohannes Doerfert return RTL->record_event(RTLDeviceID, Event, AsyncInfo);
253330d8983SJohannes Doerfert }
254330d8983SJohannes Doerfert
waitEvent(void * Event,AsyncInfoTy & AsyncInfo)255330d8983SJohannes Doerfert int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) {
256330d8983SJohannes Doerfert return RTL->wait_event(RTLDeviceID, Event, AsyncInfo);
257330d8983SJohannes Doerfert }
258330d8983SJohannes Doerfert
syncEvent(void * Event)259330d8983SJohannes Doerfert int32_t DeviceTy::syncEvent(void *Event) {
260330d8983SJohannes Doerfert return RTL->sync_event(RTLDeviceID, Event);
261330d8983SJohannes Doerfert }
262330d8983SJohannes Doerfert
destroyEvent(void * Event)263330d8983SJohannes Doerfert int32_t DeviceTy::destroyEvent(void *Event) {
264330d8983SJohannes Doerfert return RTL->destroy_event(RTLDeviceID, Event);
265330d8983SJohannes Doerfert }
266330d8983SJohannes Doerfert
dumpOffloadEntries()267330d8983SJohannes Doerfert void DeviceTy::dumpOffloadEntries() {
268330d8983SJohannes Doerfert fprintf(stderr, "Device %i offload entries:\n", DeviceID);
269330d8983SJohannes Doerfert for (auto &It : *DeviceOffloadEntries.getExclusiveAccessor()) {
270330d8983SJohannes Doerfert const char *Kind = "kernel";
271330d8983SJohannes Doerfert if (It.second.isLink())
272330d8983SJohannes Doerfert Kind = "link";
273330d8983SJohannes Doerfert else if (It.second.isGlobal())
274330d8983SJohannes Doerfert Kind = "global var.";
275330d8983SJohannes Doerfert fprintf(stderr, " %11s: %s\n", Kind, It.second.getNameAsCStr());
276330d8983SJohannes Doerfert }
277330d8983SJohannes Doerfert }
278330d8983SJohannes Doerfert
useAutoZeroCopy()279330d8983SJohannes Doerfert bool DeviceTy::useAutoZeroCopy() {
280*3abd3d6eSJoseph Huber if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY)
281*3abd3d6eSJoseph Huber return false;
282330d8983SJohannes Doerfert return RTL->use_auto_zero_copy(RTLDeviceID);
283330d8983SJohannes Doerfert }
284