xref: /llvm-project/offload/src/interface.cpp (revision 92376c3ff5453cb954a614d368fa3d52d6d0fa99)
1330d8983SJohannes Doerfert //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2330d8983SJohannes Doerfert //
3330d8983SJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4330d8983SJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
5330d8983SJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6330d8983SJohannes Doerfert //
7330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
8330d8983SJohannes Doerfert //
9330d8983SJohannes Doerfert // Implementation of the interface to be used by Clang during the codegen of a
10330d8983SJohannes Doerfert // target region.
11330d8983SJohannes Doerfert //
12330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
13330d8983SJohannes Doerfert 
14330d8983SJohannes Doerfert #include "OpenMP/OMPT/Interface.h"
15ff12c006SJohannes Doerfert #include "OffloadPolicy.h"
16330d8983SJohannes Doerfert #include "OpenMP/OMPT/Callback.h"
17ff12c006SJohannes Doerfert #include "OpenMP/omp.h"
18330d8983SJohannes Doerfert #include "PluginManager.h"
19ff12c006SJohannes Doerfert #include "omptarget.h"
20330d8983SJohannes Doerfert #include "private.h"
21330d8983SJohannes Doerfert 
22330d8983SJohannes Doerfert #include "Shared/EnvironmentVar.h"
23330d8983SJohannes Doerfert #include "Shared/Profile.h"
24330d8983SJohannes Doerfert 
25330d8983SJohannes Doerfert #include "Utils/ExponentialBackoff.h"
26330d8983SJohannes Doerfert 
27330d8983SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h"
28330d8983SJohannes Doerfert 
29330d8983SJohannes Doerfert #include <cassert>
30330d8983SJohannes Doerfert #include <cstdint>
31330d8983SJohannes Doerfert #include <cstdio>
32330d8983SJohannes Doerfert #include <cstdlib>
33330d8983SJohannes Doerfert 
34330d8983SJohannes Doerfert #ifdef OMPT_SUPPORT
35330d8983SJohannes Doerfert using namespace llvm::omp::target::ompt;
36330d8983SJohannes Doerfert #endif
37330d8983SJohannes Doerfert 
38ff12c006SJohannes Doerfert // If offload is enabled, ensure that device DeviceID has been initialized.
39ff12c006SJohannes Doerfert //
40ff12c006SJohannes Doerfert // The return bool indicates if the offload is to the host device
41ff12c006SJohannes Doerfert // There are three possible results:
42ff12c006SJohannes Doerfert // - Return false if the taregt device is ready for offload
43ff12c006SJohannes Doerfert // - Return true without reporting a runtime error if offload is
44ff12c006SJohannes Doerfert //   disabled, perhaps because the initial device was specified.
45ff12c006SJohannes Doerfert // - Report a runtime error and return true.
46ff12c006SJohannes Doerfert //
47ff12c006SJohannes Doerfert // If DeviceID == OFFLOAD_DEVICE_DEFAULT, set DeviceID to the default device.
48ff12c006SJohannes Doerfert // This step might be skipped if offload is disabled.
49ff12c006SJohannes Doerfert bool checkDevice(int64_t &DeviceID, ident_t *Loc) {
50ff12c006SJohannes Doerfert   if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) {
51ff12c006SJohannes Doerfert     DP("Offload is disabled\n");
52ff12c006SJohannes Doerfert     return true;
53ff12c006SJohannes Doerfert   }
54ff12c006SJohannes Doerfert 
55ff12c006SJohannes Doerfert   if (DeviceID == OFFLOAD_DEVICE_DEFAULT) {
56ff12c006SJohannes Doerfert     DeviceID = omp_get_default_device();
57ff12c006SJohannes Doerfert     DP("Use default device id %" PRId64 "\n", DeviceID);
58ff12c006SJohannes Doerfert   }
59ff12c006SJohannes Doerfert 
60ff12c006SJohannes Doerfert   // Proposed behavior for OpenMP 5.2 in OpenMP spec github issue 2669.
61ff12c006SJohannes Doerfert   if (omp_get_num_devices() == 0) {
62ff12c006SJohannes Doerfert     DP("omp_get_num_devices() == 0 but offload is manadatory\n");
63ff12c006SJohannes Doerfert     handleTargetOutcome(false, Loc);
64ff12c006SJohannes Doerfert     return true;
65ff12c006SJohannes Doerfert   }
66ff12c006SJohannes Doerfert 
67ff12c006SJohannes Doerfert   if (DeviceID == omp_get_initial_device()) {
68ff12c006SJohannes Doerfert     DP("Device is host (%" PRId64 "), returning as if offload is disabled\n",
69ff12c006SJohannes Doerfert        DeviceID);
70ff12c006SJohannes Doerfert     return true;
71ff12c006SJohannes Doerfert   }
72ff12c006SJohannes Doerfert   return false;
73ff12c006SJohannes Doerfert }
74ff12c006SJohannes Doerfert 
75330d8983SJohannes Doerfert ////////////////////////////////////////////////////////////////////////////////
76330d8983SJohannes Doerfert /// adds requires flags
77330d8983SJohannes Doerfert EXTERN void __tgt_register_requires(int64_t Flags) {
78330d8983SJohannes Doerfert   MESSAGE("The %s function has been removed. Old OpenMP requirements will not "
79330d8983SJohannes Doerfert           "be handled",
80330d8983SJohannes Doerfert           __PRETTY_FUNCTION__);
81330d8983SJohannes Doerfert }
82330d8983SJohannes Doerfert 
83330d8983SJohannes Doerfert EXTERN void __tgt_rtl_init() { initRuntime(); }
84330d8983SJohannes Doerfert EXTERN void __tgt_rtl_deinit() { deinitRuntime(); }
85330d8983SJohannes Doerfert 
86330d8983SJohannes Doerfert ////////////////////////////////////////////////////////////////////////////////
87330d8983SJohannes Doerfert /// adds a target shared library to the target execution image
88330d8983SJohannes Doerfert EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) {
89330d8983SJohannes Doerfert   initRuntime();
90330d8983SJohannes Doerfert   if (PM->delayRegisterLib(Desc))
91330d8983SJohannes Doerfert     return;
92330d8983SJohannes Doerfert 
93330d8983SJohannes Doerfert   PM->registerLib(Desc);
94330d8983SJohannes Doerfert }
95330d8983SJohannes Doerfert 
96330d8983SJohannes Doerfert ////////////////////////////////////////////////////////////////////////////////
97330d8983SJohannes Doerfert /// Initialize all available devices without registering any image
98330d8983SJohannes Doerfert EXTERN void __tgt_init_all_rtls() {
99330d8983SJohannes Doerfert   assert(PM && "Runtime not initialized");
1007102592aSJohannes Doerfert   PM->initializeAllDevices();
101330d8983SJohannes Doerfert }
102330d8983SJohannes Doerfert 
103330d8983SJohannes Doerfert ////////////////////////////////////////////////////////////////////////////////
104330d8983SJohannes Doerfert /// unloads a target shared library
105330d8983SJohannes Doerfert EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) {
106330d8983SJohannes Doerfert   PM->unregisterLib(Desc);
107330d8983SJohannes Doerfert 
108330d8983SJohannes Doerfert   deinitRuntime();
109330d8983SJohannes Doerfert }
110330d8983SJohannes Doerfert 
111330d8983SJohannes Doerfert template <typename TargetAsyncInfoTy>
112330d8983SJohannes Doerfert static inline void
113330d8983SJohannes Doerfert targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
114330d8983SJohannes Doerfert            void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
115330d8983SJohannes Doerfert            map_var_info_t *ArgNames, void **ArgMappers,
116330d8983SJohannes Doerfert            TargetDataFuncPtrTy TargetDataFunction, const char *RegionTypeMsg,
117330d8983SJohannes Doerfert            const char *RegionName) {
118330d8983SJohannes Doerfert   assert(PM && "Runtime not initialized");
119330d8983SJohannes Doerfert   static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>,
120330d8983SJohannes Doerfert                 "TargetAsyncInfoTy must be convertible to AsyncInfoTy.");
121330d8983SJohannes Doerfert 
122330d8983SJohannes Doerfert   TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy",
123330d8983SJohannes Doerfert                                    "NumArgs=" + std::to_string(ArgNum), Loc);
124330d8983SJohannes Doerfert 
125330d8983SJohannes Doerfert   DP("Entering data %s region for device %" PRId64 " with %d mappings\n",
126330d8983SJohannes Doerfert      RegionName, DeviceId, ArgNum);
127330d8983SJohannes Doerfert 
128ff12c006SJohannes Doerfert   if (checkDevice(DeviceId, Loc)) {
129330d8983SJohannes Doerfert     DP("Not offloading to device %" PRId64 "\n", DeviceId);
130330d8983SJohannes Doerfert     return;
131330d8983SJohannes Doerfert   }
132330d8983SJohannes Doerfert 
133330d8983SJohannes Doerfert   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
134330d8983SJohannes Doerfert     printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
135330d8983SJohannes Doerfert                          RegionTypeMsg);
136330d8983SJohannes Doerfert #ifdef OMPTARGET_DEBUG
137330d8983SJohannes Doerfert   for (int I = 0; I < ArgNum; ++I) {
138330d8983SJohannes Doerfert     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
139330d8983SJohannes Doerfert        ", Type=0x%" PRIx64 ", Name=%s\n",
140330d8983SJohannes Doerfert        I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
141330d8983SJohannes Doerfert        (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
142330d8983SJohannes Doerfert   }
143330d8983SJohannes Doerfert #endif
144330d8983SJohannes Doerfert 
145330d8983SJohannes Doerfert   auto DeviceOrErr = PM->getDevice(DeviceId);
146330d8983SJohannes Doerfert   if (!DeviceOrErr)
147330d8983SJohannes Doerfert     FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
148330d8983SJohannes Doerfert 
149330d8983SJohannes Doerfert   TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr);
150330d8983SJohannes Doerfert   AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
151330d8983SJohannes Doerfert 
152330d8983SJohannes Doerfert   /// RAII to establish tool anchors before and after data begin / end / update
153330d8983SJohannes Doerfert   OMPT_IF_BUILT(assert((TargetDataFunction == targetDataBegin ||
154330d8983SJohannes Doerfert                         TargetDataFunction == targetDataEnd ||
155330d8983SJohannes Doerfert                         TargetDataFunction == targetDataUpdate) &&
156330d8983SJohannes Doerfert                        "Encountered unexpected TargetDataFunction during "
157330d8983SJohannes Doerfert                        "execution of targetData");
158330d8983SJohannes Doerfert                 auto CallbackFunctions =
159330d8983SJohannes Doerfert                     (TargetDataFunction == targetDataBegin)
160330d8983SJohannes Doerfert                         ? RegionInterface.getCallbacks<ompt_target_enter_data>()
161330d8983SJohannes Doerfert                     : (TargetDataFunction == targetDataEnd)
162330d8983SJohannes Doerfert                         ? RegionInterface.getCallbacks<ompt_target_exit_data>()
163330d8983SJohannes Doerfert                         : RegionInterface.getCallbacks<ompt_target_update>();
164330d8983SJohannes Doerfert                 InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId,
165330d8983SJohannes Doerfert                                              OMPT_GET_RETURN_ADDRESS);)
166330d8983SJohannes Doerfert 
167330d8983SJohannes Doerfert   int Rc = OFFLOAD_SUCCESS;
168330d8983SJohannes Doerfert   Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes,
169330d8983SJohannes Doerfert                           ArgTypes, ArgNames, ArgMappers, AsyncInfo,
170330d8983SJohannes Doerfert                           false /*FromMapper=*/);
171330d8983SJohannes Doerfert 
172330d8983SJohannes Doerfert   if (Rc == OFFLOAD_SUCCESS)
173330d8983SJohannes Doerfert     Rc = AsyncInfo.synchronize();
174330d8983SJohannes Doerfert 
175330d8983SJohannes Doerfert   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
176330d8983SJohannes Doerfert }
177330d8983SJohannes Doerfert 
178330d8983SJohannes Doerfert /// creates host-to-target data mapping, stores it in the
179330d8983SJohannes Doerfert /// libomptarget.so internal structure (an entry in a stack of data maps)
180330d8983SJohannes Doerfert /// and passes the data to the device.
181330d8983SJohannes Doerfert EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
182330d8983SJohannes Doerfert                                            int32_t ArgNum, void **ArgsBase,
183330d8983SJohannes Doerfert                                            void **Args, int64_t *ArgSizes,
184330d8983SJohannes Doerfert                                            int64_t *ArgTypes,
185330d8983SJohannes Doerfert                                            map_var_info_t *ArgNames,
186330d8983SJohannes Doerfert                                            void **ArgMappers) {
187330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
188330d8983SJohannes Doerfert   targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
189330d8983SJohannes Doerfert                           ArgTypes, ArgNames, ArgMappers, targetDataBegin,
190330d8983SJohannes Doerfert                           "Entering OpenMP data region with being_mapper",
191330d8983SJohannes Doerfert                           "begin");
192330d8983SJohannes Doerfert }
193330d8983SJohannes Doerfert 
194330d8983SJohannes Doerfert EXTERN void __tgt_target_data_begin_nowait_mapper(
195330d8983SJohannes Doerfert     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
196330d8983SJohannes Doerfert     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
197330d8983SJohannes Doerfert     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
198330d8983SJohannes Doerfert     void *NoAliasDepList) {
199330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
200330d8983SJohannes Doerfert   targetData<TaskAsyncInfoWrapperTy>(
201330d8983SJohannes Doerfert       Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
202330d8983SJohannes Doerfert       ArgMappers, targetDataBegin,
203330d8983SJohannes Doerfert       "Entering OpenMP data region with being_nowait_mapper", "begin");
204330d8983SJohannes Doerfert }
205330d8983SJohannes Doerfert 
206330d8983SJohannes Doerfert /// passes data from the target, releases target memory and destroys
207330d8983SJohannes Doerfert /// the host-target mapping (top entry from the stack of data maps)
208330d8983SJohannes Doerfert /// created by the last __tgt_target_data_begin.
209330d8983SJohannes Doerfert EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
210330d8983SJohannes Doerfert                                          int32_t ArgNum, void **ArgsBase,
211330d8983SJohannes Doerfert                                          void **Args, int64_t *ArgSizes,
212330d8983SJohannes Doerfert                                          int64_t *ArgTypes,
213330d8983SJohannes Doerfert                                          map_var_info_t *ArgNames,
214330d8983SJohannes Doerfert                                          void **ArgMappers) {
215330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
216330d8983SJohannes Doerfert   targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
217330d8983SJohannes Doerfert                           ArgTypes, ArgNames, ArgMappers, targetDataEnd,
218330d8983SJohannes Doerfert                           "Exiting OpenMP data region with end_mapper", "end");
219330d8983SJohannes Doerfert }
220330d8983SJohannes Doerfert 
221330d8983SJohannes Doerfert EXTERN void __tgt_target_data_end_nowait_mapper(
222330d8983SJohannes Doerfert     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
223330d8983SJohannes Doerfert     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
224330d8983SJohannes Doerfert     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
225330d8983SJohannes Doerfert     void *NoAliasDepList) {
226330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
227330d8983SJohannes Doerfert   targetData<TaskAsyncInfoWrapperTy>(
228330d8983SJohannes Doerfert       Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
229330d8983SJohannes Doerfert       ArgMappers, targetDataEnd,
230330d8983SJohannes Doerfert       "Exiting OpenMP data region with end_nowait_mapper", "end");
231330d8983SJohannes Doerfert }
232330d8983SJohannes Doerfert 
233330d8983SJohannes Doerfert EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
234330d8983SJohannes Doerfert                                             int32_t ArgNum, void **ArgsBase,
235330d8983SJohannes Doerfert                                             void **Args, int64_t *ArgSizes,
236330d8983SJohannes Doerfert                                             int64_t *ArgTypes,
237330d8983SJohannes Doerfert                                             map_var_info_t *ArgNames,
238330d8983SJohannes Doerfert                                             void **ArgMappers) {
239330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
240330d8983SJohannes Doerfert   targetData<AsyncInfoTy>(
241330d8983SJohannes Doerfert       Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
242330d8983SJohannes Doerfert       ArgMappers, targetDataUpdate,
243330d8983SJohannes Doerfert       "Updating data within the OpenMP data region with update_mapper",
244330d8983SJohannes Doerfert       "update");
245330d8983SJohannes Doerfert }
246330d8983SJohannes Doerfert 
247330d8983SJohannes Doerfert EXTERN void __tgt_target_data_update_nowait_mapper(
248330d8983SJohannes Doerfert     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
249330d8983SJohannes Doerfert     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
250330d8983SJohannes Doerfert     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
251330d8983SJohannes Doerfert     void *NoAliasDepList) {
252330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
253330d8983SJohannes Doerfert   targetData<TaskAsyncInfoWrapperTy>(
254330d8983SJohannes Doerfert       Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
255330d8983SJohannes Doerfert       ArgMappers, targetDataUpdate,
256330d8983SJohannes Doerfert       "Updating data within the OpenMP data region with update_nowait_mapper",
257330d8983SJohannes Doerfert       "update");
258330d8983SJohannes Doerfert }
259330d8983SJohannes Doerfert 
260330d8983SJohannes Doerfert static KernelArgsTy *upgradeKernelArgs(KernelArgsTy *KernelArgs,
261330d8983SJohannes Doerfert                                        KernelArgsTy &LocalKernelArgs,
262330d8983SJohannes Doerfert                                        int32_t NumTeams, int32_t ThreadLimit) {
263330d8983SJohannes Doerfert   if (KernelArgs->Version > OMP_KERNEL_ARG_VERSION)
264330d8983SJohannes Doerfert     DP("Unexpected ABI version: %u\n", KernelArgs->Version);
265330d8983SJohannes Doerfert 
266330d8983SJohannes Doerfert   uint32_t UpgradedVersion = KernelArgs->Version;
267330d8983SJohannes Doerfert   if (KernelArgs->Version < OMP_KERNEL_ARG_VERSION) {
268330d8983SJohannes Doerfert     // The upgraded version will be based on the kernel launch environment.
269330d8983SJohannes Doerfert     if (KernelArgs->Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
270330d8983SJohannes Doerfert       UpgradedVersion = OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR - 1;
271330d8983SJohannes Doerfert     else
272330d8983SJohannes Doerfert       UpgradedVersion = OMP_KERNEL_ARG_VERSION;
273330d8983SJohannes Doerfert   }
274330d8983SJohannes Doerfert   if (UpgradedVersion != KernelArgs->Version) {
275330d8983SJohannes Doerfert     LocalKernelArgs.Version = UpgradedVersion;
276330d8983SJohannes Doerfert     LocalKernelArgs.NumArgs = KernelArgs->NumArgs;
277330d8983SJohannes Doerfert     LocalKernelArgs.ArgBasePtrs = KernelArgs->ArgBasePtrs;
278330d8983SJohannes Doerfert     LocalKernelArgs.ArgPtrs = KernelArgs->ArgPtrs;
279330d8983SJohannes Doerfert     LocalKernelArgs.ArgSizes = KernelArgs->ArgSizes;
280330d8983SJohannes Doerfert     LocalKernelArgs.ArgTypes = KernelArgs->ArgTypes;
281330d8983SJohannes Doerfert     LocalKernelArgs.ArgNames = KernelArgs->ArgNames;
282330d8983SJohannes Doerfert     LocalKernelArgs.ArgMappers = KernelArgs->ArgMappers;
283330d8983SJohannes Doerfert     LocalKernelArgs.Tripcount = KernelArgs->Tripcount;
284330d8983SJohannes Doerfert     LocalKernelArgs.Flags = KernelArgs->Flags;
285330d8983SJohannes Doerfert     LocalKernelArgs.DynCGroupMem = 0;
286330d8983SJohannes Doerfert     LocalKernelArgs.NumTeams[0] = NumTeams;
287*92376c3fSShilei Tian     LocalKernelArgs.NumTeams[1] = 1;
288*92376c3fSShilei Tian     LocalKernelArgs.NumTeams[2] = 1;
289330d8983SJohannes Doerfert     LocalKernelArgs.ThreadLimit[0] = ThreadLimit;
290*92376c3fSShilei Tian     LocalKernelArgs.ThreadLimit[1] = 1;
291*92376c3fSShilei Tian     LocalKernelArgs.ThreadLimit[2] = 1;
292330d8983SJohannes Doerfert     return &LocalKernelArgs;
293330d8983SJohannes Doerfert   }
294330d8983SJohannes Doerfert 
295*92376c3fSShilei Tian   // FIXME: This is a WA to "calibrate" the bad work done in the front end.
296*92376c3fSShilei Tian   // Delete this ugly code after the front end emits proper values.
297*92376c3fSShilei Tian   auto CorrectMultiDim = [](uint32_t(&Val)[3]) {
298*92376c3fSShilei Tian     if (Val[1] == 0)
299*92376c3fSShilei Tian       Val[1] = 1;
300*92376c3fSShilei Tian     if (Val[2] == 0)
301*92376c3fSShilei Tian       Val[2] = 1;
302*92376c3fSShilei Tian   };
303*92376c3fSShilei Tian   CorrectMultiDim(KernelArgs->ThreadLimit);
304*92376c3fSShilei Tian   CorrectMultiDim(KernelArgs->NumTeams);
305*92376c3fSShilei Tian 
306330d8983SJohannes Doerfert   return KernelArgs;
307330d8983SJohannes Doerfert }
308330d8983SJohannes Doerfert 
309330d8983SJohannes Doerfert template <typename TargetAsyncInfoTy>
310330d8983SJohannes Doerfert static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
311330d8983SJohannes Doerfert                                int32_t ThreadLimit, void *HostPtr,
312330d8983SJohannes Doerfert                                KernelArgsTy *KernelArgs) {
313330d8983SJohannes Doerfert   assert(PM && "Runtime not initialized");
314330d8983SJohannes Doerfert   static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>,
315330d8983SJohannes Doerfert                 "Target AsyncInfoTy must be convertible to AsyncInfoTy.");
316330d8983SJohannes Doerfert   DP("Entering target region for device %" PRId64 " with entry point " DPxMOD
317330d8983SJohannes Doerfert      "\n",
318330d8983SJohannes Doerfert      DeviceId, DPxPTR(HostPtr));
319330d8983SJohannes Doerfert 
320ff12c006SJohannes Doerfert   if (checkDevice(DeviceId, Loc)) {
321330d8983SJohannes Doerfert     DP("Not offloading to device %" PRId64 "\n", DeviceId);
322330d8983SJohannes Doerfert     return OMP_TGT_FAIL;
323330d8983SJohannes Doerfert   }
324330d8983SJohannes Doerfert 
325330d8983SJohannes Doerfert   bool IsTeams = NumTeams != -1;
326330d8983SJohannes Doerfert   if (!IsTeams)
327330d8983SJohannes Doerfert     KernelArgs->NumTeams[0] = NumTeams = 1;
328330d8983SJohannes Doerfert 
329330d8983SJohannes Doerfert   // Auto-upgrade kernel args version 1 to 2.
330330d8983SJohannes Doerfert   KernelArgsTy LocalKernelArgs;
331330d8983SJohannes Doerfert   KernelArgs =
332330d8983SJohannes Doerfert       upgradeKernelArgs(KernelArgs, LocalKernelArgs, NumTeams, ThreadLimit);
333330d8983SJohannes Doerfert 
334330d8983SJohannes Doerfert   TIMESCOPE_WITH_DETAILS_AND_IDENT(
335330d8983SJohannes Doerfert       "Runtime: target exe",
336330d8983SJohannes Doerfert       "NumTeams=" + std::to_string(NumTeams) +
337330d8983SJohannes Doerfert           ";NumArgs=" + std::to_string(KernelArgs->NumArgs),
338330d8983SJohannes Doerfert       Loc);
339330d8983SJohannes Doerfert 
340330d8983SJohannes Doerfert   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
341330d8983SJohannes Doerfert     printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs,
342330d8983SJohannes Doerfert                          KernelArgs->ArgSizes, KernelArgs->ArgTypes,
343330d8983SJohannes Doerfert                          KernelArgs->ArgNames, "Entering OpenMP kernel");
344330d8983SJohannes Doerfert #ifdef OMPTARGET_DEBUG
345330d8983SJohannes Doerfert   for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) {
346330d8983SJohannes Doerfert     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
347330d8983SJohannes Doerfert        ", Type=0x%" PRIx64 ", Name=%s\n",
348330d8983SJohannes Doerfert        I, DPxPTR(KernelArgs->ArgBasePtrs[I]), DPxPTR(KernelArgs->ArgPtrs[I]),
349330d8983SJohannes Doerfert        KernelArgs->ArgSizes[I], KernelArgs->ArgTypes[I],
350330d8983SJohannes Doerfert        (KernelArgs->ArgNames)
351330d8983SJohannes Doerfert            ? getNameFromMapping(KernelArgs->ArgNames[I]).c_str()
352330d8983SJohannes Doerfert            : "unknown");
353330d8983SJohannes Doerfert   }
354330d8983SJohannes Doerfert #endif
355330d8983SJohannes Doerfert 
356330d8983SJohannes Doerfert   auto DeviceOrErr = PM->getDevice(DeviceId);
357330d8983SJohannes Doerfert   if (!DeviceOrErr)
358330d8983SJohannes Doerfert     FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
359330d8983SJohannes Doerfert 
360330d8983SJohannes Doerfert   TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr);
361330d8983SJohannes Doerfert   AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
362330d8983SJohannes Doerfert   /// RAII to establish tool anchors before and after target region
363330d8983SJohannes Doerfert   OMPT_IF_BUILT(InterfaceRAII TargetRAII(
364330d8983SJohannes Doerfert                     RegionInterface.getCallbacks<ompt_target>(), DeviceId,
365330d8983SJohannes Doerfert                     /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
366330d8983SJohannes Doerfert 
367330d8983SJohannes Doerfert   int Rc = OFFLOAD_SUCCESS;
368330d8983SJohannes Doerfert   Rc = target(Loc, *DeviceOrErr, HostPtr, *KernelArgs, AsyncInfo);
369330d8983SJohannes Doerfert   { // required to show syncronization
370330d8983SJohannes Doerfert     TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: syncronize", "", Loc);
371330d8983SJohannes Doerfert     if (Rc == OFFLOAD_SUCCESS)
372330d8983SJohannes Doerfert       Rc = AsyncInfo.synchronize();
373330d8983SJohannes Doerfert 
374330d8983SJohannes Doerfert     handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
375330d8983SJohannes Doerfert     assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");
376330d8983SJohannes Doerfert   }
377330d8983SJohannes Doerfert   return OMP_TGT_SUCCESS;
378330d8983SJohannes Doerfert }
379330d8983SJohannes Doerfert 
380330d8983SJohannes Doerfert /// Implements a kernel entry that executes the target region on the specified
381330d8983SJohannes Doerfert /// device.
382330d8983SJohannes Doerfert ///
383330d8983SJohannes Doerfert /// \param Loc Source location associated with this target region.
384330d8983SJohannes Doerfert /// \param DeviceId The device to execute this region, -1 indicated the default.
385330d8983SJohannes Doerfert /// \param NumTeams Number of teams to launch the region with, -1 indicates a
386330d8983SJohannes Doerfert ///                 non-teams region and 0 indicates it was unspecified.
387330d8983SJohannes Doerfert /// \param ThreadLimit Limit to the number of threads to use in the kernel
388330d8983SJohannes Doerfert ///                    launch, 0 indicates it was unspecified.
389330d8983SJohannes Doerfert /// \param HostPtr  The pointer to the host function registered with the kernel.
390330d8983SJohannes Doerfert /// \param Args     All arguments to this kernel launch (see struct definition).
391330d8983SJohannes Doerfert EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
392330d8983SJohannes Doerfert                                int32_t ThreadLimit, void *HostPtr,
393330d8983SJohannes Doerfert                                KernelArgsTy *KernelArgs) {
394330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
395330d8983SJohannes Doerfert   if (KernelArgs->Flags.NoWait)
396330d8983SJohannes Doerfert     return targetKernel<TaskAsyncInfoWrapperTy>(
397330d8983SJohannes Doerfert         Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, KernelArgs);
398330d8983SJohannes Doerfert   return targetKernel<AsyncInfoTy>(Loc, DeviceId, NumTeams, ThreadLimit,
399330d8983SJohannes Doerfert                                    HostPtr, KernelArgs);
400330d8983SJohannes Doerfert }
401330d8983SJohannes Doerfert 
402330d8983SJohannes Doerfert /// Activates the record replay mechanism.
403330d8983SJohannes Doerfert /// \param DeviceId The device identifier to execute the target region.
404330d8983SJohannes Doerfert /// \param MemorySize The number of bytes to be (pre-)allocated
405330d8983SJohannes Doerfert ///                   by the bump allocator
406330d8983SJohannes Doerfert /// /param IsRecord Activates the record replay mechanism in
407330d8983SJohannes Doerfert ///                 'record' mode or 'replay' mode.
408330d8983SJohannes Doerfert /// /param SaveOutput Store the device memory after kernel
409330d8983SJohannes Doerfert ///                   execution on persistent storage
410330d8983SJohannes Doerfert EXTERN int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize,
411330d8983SJohannes Doerfert                                         void *VAddr, bool IsRecord,
412330d8983SJohannes Doerfert                                         bool SaveOutput,
413330d8983SJohannes Doerfert                                         uint64_t &ReqPtrArgOffset) {
414330d8983SJohannes Doerfert   assert(PM && "Runtime not initialized");
415330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
416330d8983SJohannes Doerfert   auto DeviceOrErr = PM->getDevice(DeviceId);
417330d8983SJohannes Doerfert   if (!DeviceOrErr)
418330d8983SJohannes Doerfert     FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
419330d8983SJohannes Doerfert 
420330d8983SJohannes Doerfert   [[maybe_unused]] int Rc = target_activate_rr(
421330d8983SJohannes Doerfert       *DeviceOrErr, MemorySize, VAddr, IsRecord, SaveOutput, ReqPtrArgOffset);
422330d8983SJohannes Doerfert   assert(Rc == OFFLOAD_SUCCESS &&
423330d8983SJohannes Doerfert          "__tgt_activate_record_replay unexpected failure!");
424330d8983SJohannes Doerfert   return OMP_TGT_SUCCESS;
425330d8983SJohannes Doerfert }
426330d8983SJohannes Doerfert 
427330d8983SJohannes Doerfert /// Implements a target kernel entry that replays a pre-recorded kernel.
428330d8983SJohannes Doerfert /// \param Loc Source location associated with this target region (unused).
429330d8983SJohannes Doerfert /// \param DeviceId The device identifier to execute the target region.
430330d8983SJohannes Doerfert /// \param HostPtr A pointer to an address that uniquely identifies the kernel.
431330d8983SJohannes Doerfert /// \param DeviceMemory A pointer to an array storing device memory data to move
432330d8983SJohannes Doerfert ///                     prior to kernel execution.
433330d8983SJohannes Doerfert /// \param DeviceMemorySize The size of the above device memory data in bytes.
434330d8983SJohannes Doerfert /// \param TgtArgs An array of pointers of the pre-recorded target kernel
435330d8983SJohannes Doerfert ///                arguments.
436330d8983SJohannes Doerfert /// \param TgtOffsets An array of pointers of the pre-recorded target kernel
437330d8983SJohannes Doerfert ///                   argument offsets.
438330d8983SJohannes Doerfert /// \param NumArgs The number of kernel arguments.
439330d8983SJohannes Doerfert /// \param NumTeams Number of teams to launch the target region with.
440330d8983SJohannes Doerfert /// \param ThreadLimit Limit to the number of threads to use in kernel
441330d8983SJohannes Doerfert ///                    execution.
442330d8983SJohannes Doerfert /// \param LoopTripCount The pre-recorded value of the loop tripcount, if any.
443330d8983SJohannes Doerfert /// \return OMP_TGT_SUCCESS on success, OMP_TGT_FAIL on failure.
444330d8983SJohannes Doerfert EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
445330d8983SJohannes Doerfert                                       void *HostPtr, void *DeviceMemory,
446330d8983SJohannes Doerfert                                       int64_t DeviceMemorySize, void **TgtArgs,
447330d8983SJohannes Doerfert                                       ptrdiff_t *TgtOffsets, int32_t NumArgs,
448330d8983SJohannes Doerfert                                       int32_t NumTeams, int32_t ThreadLimit,
449330d8983SJohannes Doerfert                                       uint64_t LoopTripCount) {
450330d8983SJohannes Doerfert   assert(PM && "Runtime not initialized");
451330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
452ff12c006SJohannes Doerfert   if (checkDevice(DeviceId, Loc)) {
453330d8983SJohannes Doerfert     DP("Not offloading to device %" PRId64 "\n", DeviceId);
454330d8983SJohannes Doerfert     return OMP_TGT_FAIL;
455330d8983SJohannes Doerfert   }
456330d8983SJohannes Doerfert   auto DeviceOrErr = PM->getDevice(DeviceId);
457330d8983SJohannes Doerfert   if (!DeviceOrErr)
458330d8983SJohannes Doerfert     FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
459330d8983SJohannes Doerfert 
460330d8983SJohannes Doerfert   /// RAII to establish tool anchors before and after target region
461330d8983SJohannes Doerfert   OMPT_IF_BUILT(InterfaceRAII TargetRAII(
462330d8983SJohannes Doerfert                     RegionInterface.getCallbacks<ompt_target>(), DeviceId,
463330d8983SJohannes Doerfert                     /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
464330d8983SJohannes Doerfert 
465330d8983SJohannes Doerfert   AsyncInfoTy AsyncInfo(*DeviceOrErr);
466330d8983SJohannes Doerfert   int Rc = target_replay(Loc, *DeviceOrErr, HostPtr, DeviceMemory,
467330d8983SJohannes Doerfert                          DeviceMemorySize, TgtArgs, TgtOffsets, NumArgs,
468330d8983SJohannes Doerfert                          NumTeams, ThreadLimit, LoopTripCount, AsyncInfo);
469330d8983SJohannes Doerfert   if (Rc == OFFLOAD_SUCCESS)
470330d8983SJohannes Doerfert     Rc = AsyncInfo.synchronize();
471330d8983SJohannes Doerfert   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
472330d8983SJohannes Doerfert   assert(Rc == OFFLOAD_SUCCESS &&
473330d8983SJohannes Doerfert          "__tgt_target_kernel_replay unexpected failure!");
474330d8983SJohannes Doerfert   return OMP_TGT_SUCCESS;
475330d8983SJohannes Doerfert }
476330d8983SJohannes Doerfert 
477330d8983SJohannes Doerfert // Get the current number of components for a user-defined mapper.
478330d8983SJohannes Doerfert EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
479330d8983SJohannes Doerfert   auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
480330d8983SJohannes Doerfert   int64_t Size = MapperComponentsPtr->Components.size();
481330d8983SJohannes Doerfert   DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
482330d8983SJohannes Doerfert      DPxPTR(RtMapperHandle), Size);
483330d8983SJohannes Doerfert   return Size;
484330d8983SJohannes Doerfert }
485330d8983SJohannes Doerfert 
486330d8983SJohannes Doerfert // Push back one component for a user-defined mapper.
487330d8983SJohannes Doerfert EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
488330d8983SJohannes Doerfert                                         void *Begin, int64_t Size, int64_t Type,
489330d8983SJohannes Doerfert                                         void *Name) {
490330d8983SJohannes Doerfert   DP("__tgt_push_mapper_component(Handle=" DPxMOD
491330d8983SJohannes Doerfert      ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
492330d8983SJohannes Doerfert      ", Type=0x%" PRIx64 ", Name=%s).\n",
493330d8983SJohannes Doerfert      DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
494330d8983SJohannes Doerfert      (Name) ? getNameFromMapping(Name).c_str() : "unknown");
495330d8983SJohannes Doerfert   auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
496330d8983SJohannes Doerfert   MapperComponentsPtr->Components.push_back(
497330d8983SJohannes Doerfert       MapComponentInfoTy(Base, Begin, Size, Type, Name));
498330d8983SJohannes Doerfert }
499330d8983SJohannes Doerfert 
500330d8983SJohannes Doerfert EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
501330d8983SJohannes Doerfert   assert(PM && "Runtime not initialized");
502330d8983SJohannes Doerfert   std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
503330d8983SJohannes Doerfert   InfoLevel.store(NewInfoLevel);
504330d8983SJohannes Doerfert }
505330d8983SJohannes Doerfert 
506330d8983SJohannes Doerfert EXTERN int __tgt_print_device_info(int64_t DeviceId) {
507330d8983SJohannes Doerfert   assert(PM && "Runtime not initialized");
508330d8983SJohannes Doerfert   auto DeviceOrErr = PM->getDevice(DeviceId);
509330d8983SJohannes Doerfert   if (!DeviceOrErr)
510330d8983SJohannes Doerfert     FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
511330d8983SJohannes Doerfert 
512330d8983SJohannes Doerfert   return DeviceOrErr->printDeviceInfo();
513330d8983SJohannes Doerfert }
514330d8983SJohannes Doerfert 
515330d8983SJohannes Doerfert EXTERN void __tgt_target_nowait_query(void **AsyncHandle) {
516330d8983SJohannes Doerfert   assert(PM && "Runtime not initialized");
517330d8983SJohannes Doerfert   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
518330d8983SJohannes Doerfert 
519330d8983SJohannes Doerfert   if (!AsyncHandle || !*AsyncHandle) {
520330d8983SJohannes Doerfert     FATAL_MESSAGE0(
521330d8983SJohannes Doerfert         1, "Receive an invalid async handle from the current OpenMP task. Is "
522330d8983SJohannes Doerfert            "this a target nowait region?\n");
523330d8983SJohannes Doerfert   }
524330d8983SJohannes Doerfert 
525330d8983SJohannes Doerfert   // Exponential backoff tries to optimally decide if a thread should just query
526330d8983SJohannes Doerfert   // for the device operations (work/spin wait on them) or block until they are
527330d8983SJohannes Doerfert   // completed (use device side blocking mechanism). This allows the runtime to
528330d8983SJohannes Doerfert   // adapt itself when there are a lot of long-running target regions in-flight.
529330d8983SJohannes Doerfert   static thread_local utils::ExponentialBackoff QueryCounter(
530330d8983SJohannes Doerfert       Int64Envar("OMPTARGET_QUERY_COUNT_MAX", 10),
531330d8983SJohannes Doerfert       Int64Envar("OMPTARGET_QUERY_COUNT_THRESHOLD", 5),
532330d8983SJohannes Doerfert       Envar<float>("OMPTARGET_QUERY_COUNT_BACKOFF_FACTOR", 0.5f));
533330d8983SJohannes Doerfert 
534330d8983SJohannes Doerfert   auto *AsyncInfo = (AsyncInfoTy *)*AsyncHandle;
535330d8983SJohannes Doerfert 
536330d8983SJohannes Doerfert   // If the thread is actively waiting on too many target nowait regions, we
537330d8983SJohannes Doerfert   // should use the blocking sync type.
538330d8983SJohannes Doerfert   if (QueryCounter.isAboveThreshold())
539330d8983SJohannes Doerfert     AsyncInfo->SyncType = AsyncInfoTy::SyncTy::BLOCKING;
540330d8983SJohannes Doerfert 
541330d8983SJohannes Doerfert   if (AsyncInfo->synchronize())
542330d8983SJohannes Doerfert     FATAL_MESSAGE0(1, "Error while querying the async queue for completion.\n");
543330d8983SJohannes Doerfert   // If there are device operations still pending, return immediately without
544330d8983SJohannes Doerfert   // deallocating the handle and increase the current thread query count.
545330d8983SJohannes Doerfert   if (!AsyncInfo->isDone()) {
546330d8983SJohannes Doerfert     QueryCounter.increment();
547330d8983SJohannes Doerfert     return;
548330d8983SJohannes Doerfert   }
549330d8983SJohannes Doerfert 
550330d8983SJohannes Doerfert   // When a thread successfully completes a target nowait region, we
551330d8983SJohannes Doerfert   // exponentially backoff its query counter by the query factor.
552330d8983SJohannes Doerfert   QueryCounter.decrement();
553330d8983SJohannes Doerfert 
554330d8983SJohannes Doerfert   // Delete the handle and unset it from the OpenMP task data.
555330d8983SJohannes Doerfert   delete AsyncInfo;
556330d8983SJohannes Doerfert   *AsyncHandle = nullptr;
557330d8983SJohannes Doerfert }
558