xref: /llvm-project/offload/include/omptarget.h (revision 80525dfcde5bf8aae6ab6b0810124ba502de6096)
1330d8983SJohannes Doerfert //===-------- omptarget.h - Target independent OpenMP target RTL -- C++ -*-===//
2330d8983SJohannes Doerfert //
3330d8983SJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4330d8983SJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
5330d8983SJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6330d8983SJohannes Doerfert //
7330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
8330d8983SJohannes Doerfert //
9330d8983SJohannes Doerfert // Interface to be used by Clang during the codegen of a
10330d8983SJohannes Doerfert // target region.
11330d8983SJohannes Doerfert //
12330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
13330d8983SJohannes Doerfert 
14330d8983SJohannes Doerfert #ifndef _OMPTARGET_H_
15330d8983SJohannes Doerfert #define _OMPTARGET_H_
16330d8983SJohannes Doerfert 
17330d8983SJohannes Doerfert #include "Shared/APITypes.h"
18330d8983SJohannes Doerfert #include "Shared/Environment.h"
19330d8983SJohannes Doerfert #include "Shared/SourceInfo.h"
20330d8983SJohannes Doerfert 
21330d8983SJohannes Doerfert #include "OpenMP/InternalTypes.h"
22330d8983SJohannes Doerfert 
23330d8983SJohannes Doerfert #include <cstddef>
24330d8983SJohannes Doerfert #include <cstdint>
25330d8983SJohannes Doerfert #include <deque>
26330d8983SJohannes Doerfert #include <functional>
27330d8983SJohannes Doerfert #include <type_traits>
28330d8983SJohannes Doerfert 
29330d8983SJohannes Doerfert #include "llvm/ADT/SmallVector.h"
30330d8983SJohannes Doerfert 
31330d8983SJohannes Doerfert #define OFFLOAD_SUCCESS (0)
32330d8983SJohannes Doerfert #define OFFLOAD_FAIL (~0)
33330d8983SJohannes Doerfert 
34330d8983SJohannes Doerfert #define OFFLOAD_DEVICE_DEFAULT -1
35330d8983SJohannes Doerfert 
36330d8983SJohannes Doerfert // Don't format out enums and structs.
37330d8983SJohannes Doerfert // clang-format off
38330d8983SJohannes Doerfert 
39330d8983SJohannes Doerfert /// return flags of __tgt_target_XXX public APIs
40330d8983SJohannes Doerfert enum __tgt_target_return_t : int {
41330d8983SJohannes Doerfert   /// successful offload executed on a target device
42330d8983SJohannes Doerfert   OMP_TGT_SUCCESS = 0,
43330d8983SJohannes Doerfert   /// offload may not execute on the requested target device
44330d8983SJohannes Doerfert   /// this scenario can be caused by the device not available or unsupported
45330d8983SJohannes Doerfert   /// as described in the Execution Model in the specifcation
46330d8983SJohannes Doerfert   /// this status may not be used for target device execution failure
47330d8983SJohannes Doerfert   /// which should be handled internally in libomptarget
48330d8983SJohannes Doerfert   OMP_TGT_FAIL = ~0
49330d8983SJohannes Doerfert };
50330d8983SJohannes Doerfert 
51330d8983SJohannes Doerfert /// Data attributes for each data reference used in an OpenMP target region.
52330d8983SJohannes Doerfert enum tgt_map_type {
53330d8983SJohannes Doerfert   // No flags
54330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_NONE            = 0x000,
55330d8983SJohannes Doerfert   // copy data from host to device
56330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_TO              = 0x001,
57330d8983SJohannes Doerfert   // copy data from device to host
58330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_FROM            = 0x002,
59330d8983SJohannes Doerfert   // copy regardless of the reference count
60330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_ALWAYS          = 0x004,
61330d8983SJohannes Doerfert   // force unmapping of data
62330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_DELETE          = 0x008,
63330d8983SJohannes Doerfert   // map the pointer as well as the pointee
64330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_PTR_AND_OBJ     = 0x010,
65330d8983SJohannes Doerfert   // pass device base address to kernel
66330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_TARGET_PARAM    = 0x020,
67330d8983SJohannes Doerfert   // return base device address of mapped data
68330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_RETURN_PARAM    = 0x040,
69330d8983SJohannes Doerfert   // private variable - not mapped
70330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_PRIVATE         = 0x080,
71330d8983SJohannes Doerfert   // copy by value - not mapped
72330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_LITERAL         = 0x100,
73330d8983SJohannes Doerfert   // mapping is implicit
74330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_IMPLICIT        = 0x200,
75330d8983SJohannes Doerfert   // copy data to device
76330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_CLOSE           = 0x400,
77330d8983SJohannes Doerfert   // runtime error if not already allocated
78330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_PRESENT         = 0x1000,
79330d8983SJohannes Doerfert   // use a separate reference counter so that the data cannot be unmapped within
80330d8983SJohannes Doerfert   // the structured region
81330d8983SJohannes Doerfert   // This is an OpenMP extension for the sake of OpenACC support.
82330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_OMPX_HOLD       = 0x2000,
83330d8983SJohannes Doerfert   // descriptor for non-contiguous target-update
84330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_NON_CONTIG      = 0x100000000000,
85330d8983SJohannes Doerfert   // member of struct, member given by [16 MSBs] - 1
86330d8983SJohannes Doerfert   OMP_TGT_MAPTYPE_MEMBER_OF       = 0xffff000000000000
87330d8983SJohannes Doerfert };
88330d8983SJohannes Doerfert 
89330d8983SJohannes Doerfert /// Flags for offload entries.
90330d8983SJohannes Doerfert enum OpenMPOffloadingDeclareTargetFlags {
91330d8983SJohannes Doerfert   /// Mark the entry global as having a 'link' attribute.
92330d8983SJohannes Doerfert   OMP_DECLARE_TARGET_LINK = 0x01,
93330d8983SJohannes Doerfert   /// Mark the entry global as being an indirectly callable function.
94330d8983SJohannes Doerfert   OMP_DECLARE_TARGET_INDIRECT = 0x08,
95330d8983SJohannes Doerfert   /// This is an entry corresponding to a requirement to be registered.
96330d8983SJohannes Doerfert   OMP_REGISTER_REQUIRES = 0x10,
97330d8983SJohannes Doerfert };
98330d8983SJohannes Doerfert 
99330d8983SJohannes Doerfert enum TargetAllocTy : int32_t {
100330d8983SJohannes Doerfert   TARGET_ALLOC_DEVICE = 0,
101330d8983SJohannes Doerfert   TARGET_ALLOC_HOST,
102330d8983SJohannes Doerfert   TARGET_ALLOC_SHARED,
103330d8983SJohannes Doerfert   TARGET_ALLOC_DEFAULT,
104330d8983SJohannes Doerfert   /// The allocation will not block on other streams.
105330d8983SJohannes Doerfert   TARGET_ALLOC_DEVICE_NON_BLOCKING,
106330d8983SJohannes Doerfert };
107330d8983SJohannes Doerfert 
108330d8983SJohannes Doerfert inline KernelArgsTy CTorDTorKernelArgs = {1,       0,       nullptr,   nullptr,
109330d8983SJohannes Doerfert 	     nullptr, nullptr, nullptr,   nullptr,
110*80525dfcSJohannes Doerfert 	     0,      {0,0,0},       {1, 0, 0}, {1, 0, 0}, 0};
111330d8983SJohannes Doerfert 
112330d8983SJohannes Doerfert struct DeviceTy;
113330d8983SJohannes Doerfert 
114330d8983SJohannes Doerfert /// The libomptarget wrapper around a __tgt_async_info object directly
115330d8983SJohannes Doerfert /// associated with a libomptarget layer device. RAII semantics to avoid
116330d8983SJohannes Doerfert /// mistakes.
117330d8983SJohannes Doerfert class AsyncInfoTy {
118330d8983SJohannes Doerfert public:
119330d8983SJohannes Doerfert   enum class SyncTy { BLOCKING, NON_BLOCKING };
120330d8983SJohannes Doerfert 
121330d8983SJohannes Doerfert private:
122330d8983SJohannes Doerfert   /// Locations we used in (potentially) asynchronous calls which should live
123330d8983SJohannes Doerfert   /// as long as this AsyncInfoTy object.
124330d8983SJohannes Doerfert   std::deque<void *> BufferLocations;
125330d8983SJohannes Doerfert 
126330d8983SJohannes Doerfert   /// Post-processing operations executed after a successful synchronization.
127330d8983SJohannes Doerfert   /// \note the post-processing function should return OFFLOAD_SUCCESS or
128330d8983SJohannes Doerfert   /// OFFLOAD_FAIL appropriately.
129330d8983SJohannes Doerfert   using PostProcFuncTy = std::function<int()>;
130330d8983SJohannes Doerfert   llvm::SmallVector<PostProcFuncTy> PostProcessingFunctions;
131330d8983SJohannes Doerfert 
132330d8983SJohannes Doerfert   __tgt_async_info AsyncInfo;
133330d8983SJohannes Doerfert   DeviceTy &Device;
134330d8983SJohannes Doerfert 
135330d8983SJohannes Doerfert public:
136330d8983SJohannes Doerfert   /// Synchronization method to be used.
137330d8983SJohannes Doerfert   SyncTy SyncType;
138330d8983SJohannes Doerfert 
139330d8983SJohannes Doerfert   AsyncInfoTy(DeviceTy &Device, SyncTy SyncType = SyncTy::BLOCKING)
140330d8983SJohannes Doerfert       : Device(Device), SyncType(SyncType) {}
141330d8983SJohannes Doerfert   ~AsyncInfoTy() { synchronize(); }
142330d8983SJohannes Doerfert 
143330d8983SJohannes Doerfert   /// Implicit conversion to the __tgt_async_info which is used in the
144330d8983SJohannes Doerfert   /// plugin interface.
145330d8983SJohannes Doerfert   operator __tgt_async_info *() { return &AsyncInfo; }
146330d8983SJohannes Doerfert 
147330d8983SJohannes Doerfert   /// Synchronize all pending actions.
148330d8983SJohannes Doerfert   ///
149330d8983SJohannes Doerfert   /// \note synchronization will be performance in a blocking or non-blocking
150330d8983SJohannes Doerfert   /// manner, depending on the SyncType.
151330d8983SJohannes Doerfert   ///
152330d8983SJohannes Doerfert   /// \note if the operations are completed, the registered post-processing
153330d8983SJohannes Doerfert   /// functions will be executed once and unregistered afterwards.
154330d8983SJohannes Doerfert   ///
155330d8983SJohannes Doerfert   /// \returns OFFLOAD_FAIL or OFFLOAD_SUCCESS appropriately.
156330d8983SJohannes Doerfert   int synchronize();
157330d8983SJohannes Doerfert 
158330d8983SJohannes Doerfert   /// Return a void* reference with a lifetime that is at least as long as this
159330d8983SJohannes Doerfert   /// AsyncInfoTy object. The location can be used as intermediate buffer.
160330d8983SJohannes Doerfert   void *&getVoidPtrLocation();
161330d8983SJohannes Doerfert 
162330d8983SJohannes Doerfert   /// Check if all asynchronous operations are completed.
163330d8983SJohannes Doerfert   ///
164330d8983SJohannes Doerfert   /// \note only a lightweight check. If needed, use synchronize() to query the
165330d8983SJohannes Doerfert   /// status of AsyncInfo before checking.
166330d8983SJohannes Doerfert   ///
167330d8983SJohannes Doerfert   /// \returns true if there is no pending asynchronous operations, false
168330d8983SJohannes Doerfert   /// otherwise.
169330d8983SJohannes Doerfert   bool isDone() const;
170330d8983SJohannes Doerfert 
171330d8983SJohannes Doerfert   /// Add a new post-processing function to be executed after synchronization.
172330d8983SJohannes Doerfert   ///
173330d8983SJohannes Doerfert   /// \param[in] Function is a templated function (e.g., function pointers,
174330d8983SJohannes Doerfert   /// lambdas, std::function) that can be convertible to a PostProcFuncTy (i.e.,
175330d8983SJohannes Doerfert   /// it must have int() as its function signature).
176330d8983SJohannes Doerfert   template <typename FuncTy> void addPostProcessingFunction(FuncTy &&Function) {
177330d8983SJohannes Doerfert     static_assert(std::is_convertible_v<FuncTy, PostProcFuncTy>,
178330d8983SJohannes Doerfert                   "Invalid post-processing function type. Please check "
179330d8983SJohannes Doerfert                   "function signature!");
180330d8983SJohannes Doerfert     PostProcessingFunctions.emplace_back(Function);
181330d8983SJohannes Doerfert   }
182330d8983SJohannes Doerfert 
183330d8983SJohannes Doerfert private:
184330d8983SJohannes Doerfert   /// Run all the post-processing functions sequentially.
185330d8983SJohannes Doerfert   ///
186330d8983SJohannes Doerfert   /// \note after a successful execution, all previously registered functions
187330d8983SJohannes Doerfert   /// are unregistered.
188330d8983SJohannes Doerfert   ///
189330d8983SJohannes Doerfert   /// \returns OFFLOAD_FAIL if any post-processing function failed,
190330d8983SJohannes Doerfert   /// OFFLOAD_SUCCESS otherwise.
191330d8983SJohannes Doerfert   int32_t runPostProcessing();
192330d8983SJohannes Doerfert 
193330d8983SJohannes Doerfert   /// Check if the internal asynchronous info queue is empty or not.
194330d8983SJohannes Doerfert   ///
195330d8983SJohannes Doerfert   /// \returns true if empty, false otherwise.
196330d8983SJohannes Doerfert   bool isQueueEmpty() const;
197330d8983SJohannes Doerfert };
198330d8983SJohannes Doerfert 
199330d8983SJohannes Doerfert // Wrapper for task stored async info objects.
200330d8983SJohannes Doerfert class TaskAsyncInfoWrapperTy {
201330d8983SJohannes Doerfert   // Invalid GTID as defined by libomp; keep in sync
202330d8983SJohannes Doerfert   static constexpr int KMP_GTID_DNE = -2;
203330d8983SJohannes Doerfert 
204330d8983SJohannes Doerfert   const int ExecThreadID = KMP_GTID_DNE;
205330d8983SJohannes Doerfert   AsyncInfoTy LocalAsyncInfo;
206330d8983SJohannes Doerfert   AsyncInfoTy *AsyncInfo = &LocalAsyncInfo;
207330d8983SJohannes Doerfert   void **TaskAsyncInfoPtr = nullptr;
208330d8983SJohannes Doerfert 
209330d8983SJohannes Doerfert public:
210330d8983SJohannes Doerfert   TaskAsyncInfoWrapperTy(DeviceTy &Device)
211330d8983SJohannes Doerfert       : ExecThreadID(__kmpc_global_thread_num(NULL)), LocalAsyncInfo(Device) {
212330d8983SJohannes Doerfert     // If we failed to acquired the current global thread id, we cannot
213330d8983SJohannes Doerfert     // re-enqueue the current task. Thus we should use the local blocking async
214330d8983SJohannes Doerfert     // info.
215330d8983SJohannes Doerfert     if (ExecThreadID == KMP_GTID_DNE)
216330d8983SJohannes Doerfert       return;
217330d8983SJohannes Doerfert 
218330d8983SJohannes Doerfert     // Only tasks with an assigned task team can be re-enqueue and thus can
219330d8983SJohannes Doerfert     // use the non-blocking synchronization scheme. Thus we should use the local
220330d8983SJohannes Doerfert     // blocking async info, if we don´t have one.
221330d8983SJohannes Doerfert     if (!__kmpc_omp_has_task_team(ExecThreadID))
222330d8983SJohannes Doerfert       return;
223330d8983SJohannes Doerfert 
224330d8983SJohannes Doerfert     // Acquire a pointer to the AsyncInfo stored inside the current task being
225330d8983SJohannes Doerfert     // executed.
226330d8983SJohannes Doerfert     TaskAsyncInfoPtr = __kmpc_omp_get_target_async_handle_ptr(ExecThreadID);
227330d8983SJohannes Doerfert 
228330d8983SJohannes Doerfert     // If we cannot acquire such pointer, fallback to using the local blocking
229330d8983SJohannes Doerfert     // async info.
230330d8983SJohannes Doerfert     if (!TaskAsyncInfoPtr)
231330d8983SJohannes Doerfert       return;
232330d8983SJohannes Doerfert 
233330d8983SJohannes Doerfert     // When creating a new task async info, the task handle must always be
234330d8983SJohannes Doerfert     // invalid. We must never overwrite any task async handle and there should
235330d8983SJohannes Doerfert     // never be any valid handle store inside the task at this point.
236330d8983SJohannes Doerfert     assert((*TaskAsyncInfoPtr) == nullptr &&
237330d8983SJohannes Doerfert            "Task async handle is not empty when dispatching new device "
238330d8983SJohannes Doerfert            "operations. The handle was not cleared properly or "
239330d8983SJohannes Doerfert            "__tgt_target_nowait_query should have been called!");
240330d8983SJohannes Doerfert 
241330d8983SJohannes Doerfert     // If no valid async handle is present, a new AsyncInfo will be allocated
242330d8983SJohannes Doerfert     // and stored in the current task.
243330d8983SJohannes Doerfert     AsyncInfo = new AsyncInfoTy(Device, AsyncInfoTy::SyncTy::NON_BLOCKING);
244330d8983SJohannes Doerfert     *TaskAsyncInfoPtr = (void *)AsyncInfo;
245330d8983SJohannes Doerfert   }
246330d8983SJohannes Doerfert 
247330d8983SJohannes Doerfert   ~TaskAsyncInfoWrapperTy() {
248330d8983SJohannes Doerfert     // Local async info destruction is automatically handled by ~AsyncInfoTy.
249330d8983SJohannes Doerfert     if (AsyncInfo == &LocalAsyncInfo)
250330d8983SJohannes Doerfert       return;
251330d8983SJohannes Doerfert 
252330d8983SJohannes Doerfert     // If the are device operations still pending, return immediately without
253330d8983SJohannes Doerfert     // deallocating the handle.
254330d8983SJohannes Doerfert     if (!AsyncInfo->isDone())
255330d8983SJohannes Doerfert       return;
256330d8983SJohannes Doerfert 
257330d8983SJohannes Doerfert     // Delete the handle and unset it from the OpenMP task data.
258330d8983SJohannes Doerfert     delete AsyncInfo;
259330d8983SJohannes Doerfert     *TaskAsyncInfoPtr = nullptr;
260330d8983SJohannes Doerfert   }
261330d8983SJohannes Doerfert 
262330d8983SJohannes Doerfert   operator AsyncInfoTy &() { return *AsyncInfo; }
263330d8983SJohannes Doerfert };
264330d8983SJohannes Doerfert 
265330d8983SJohannes Doerfert /// This struct is a record of non-contiguous information
266330d8983SJohannes Doerfert struct __tgt_target_non_contig {
267330d8983SJohannes Doerfert   uint64_t Offset;
268330d8983SJohannes Doerfert   uint64_t Count;
269330d8983SJohannes Doerfert   uint64_t Stride;
270330d8983SJohannes Doerfert };
271330d8983SJohannes Doerfert 
272330d8983SJohannes Doerfert #ifdef __cplusplus
273330d8983SJohannes Doerfert extern "C" {
274330d8983SJohannes Doerfert #endif
275330d8983SJohannes Doerfert 
276330d8983SJohannes Doerfert void ompx_dump_mapping_tables(void);
277330d8983SJohannes Doerfert int omp_get_num_devices(void);
278330d8983SJohannes Doerfert int omp_get_device_num(void);
279330d8983SJohannes Doerfert int omp_get_initial_device(void);
280330d8983SJohannes Doerfert void *omp_target_alloc(size_t Size, int DeviceNum);
281330d8983SJohannes Doerfert void omp_target_free(void *DevicePtr, int DeviceNum);
282330d8983SJohannes Doerfert int omp_target_is_present(const void *Ptr, int DeviceNum);
283330d8983SJohannes Doerfert int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
284330d8983SJohannes Doerfert                       size_t DstOffset, size_t SrcOffset, int DstDevice,
285330d8983SJohannes Doerfert                       int SrcDevice);
286330d8983SJohannes Doerfert int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
287330d8983SJohannes Doerfert                            int NumDims, const size_t *Volume,
288330d8983SJohannes Doerfert                            const size_t *DstOffsets, const size_t *SrcOffsets,
289330d8983SJohannes Doerfert                            const size_t *DstDimensions,
290330d8983SJohannes Doerfert                            const size_t *SrcDimensions, int DstDevice,
291330d8983SJohannes Doerfert                            int SrcDevice);
292330d8983SJohannes Doerfert void *omp_target_memset(void *Ptr, int C, size_t N, int DeviceNum);
293330d8983SJohannes Doerfert int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
294330d8983SJohannes Doerfert                              size_t Size, size_t DeviceOffset, int DeviceNum);
295330d8983SJohannes Doerfert int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum);
296330d8983SJohannes Doerfert 
297330d8983SJohannes Doerfert /// Explicit target memory allocators
298330d8983SJohannes Doerfert /// Using the llvm_ prefix until they become part of the OpenMP standard.
299330d8983SJohannes Doerfert void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum);
300330d8983SJohannes Doerfert void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum);
301330d8983SJohannes Doerfert void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
302330d8983SJohannes Doerfert 
303330d8983SJohannes Doerfert /// Explicit target memory deallocators
304330d8983SJohannes Doerfert /// Using the llvm_ prefix until they become part of the OpenMP standard.
305330d8983SJohannes Doerfert void llvm_omp_target_free_device(void *DevicePtr, int DeviceNum);
306330d8983SJohannes Doerfert void llvm_omp_target_free_host(void *DevicePtr, int DeviceNum);
307330d8983SJohannes Doerfert void llvm_omp_target_free_shared(void *DevicePtr, int DeviceNum);
308330d8983SJohannes Doerfert 
309330d8983SJohannes Doerfert /// Dummy target so we have a symbol for generating host fallback.
310330d8983SJohannes Doerfert void *llvm_omp_target_dynamic_shared_alloc();
311330d8983SJohannes Doerfert 
312330d8983SJohannes Doerfert /// add the clauses of the requires directives in a given file
313330d8983SJohannes Doerfert void __tgt_register_requires(int64_t Flags);
314330d8983SJohannes Doerfert 
315330d8983SJohannes Doerfert /// Initializes the runtime library.
316330d8983SJohannes Doerfert void __tgt_rtl_init();
317330d8983SJohannes Doerfert 
318330d8983SJohannes Doerfert /// Deinitializes the runtime library.
319330d8983SJohannes Doerfert void __tgt_rtl_deinit();
320330d8983SJohannes Doerfert 
321330d8983SJohannes Doerfert /// adds a target shared library to the target execution image
322330d8983SJohannes Doerfert void __tgt_register_lib(__tgt_bin_desc *Desc);
323330d8983SJohannes Doerfert 
324330d8983SJohannes Doerfert /// Initialize all RTLs at once
325330d8983SJohannes Doerfert void __tgt_init_all_rtls();
326330d8983SJohannes Doerfert 
327330d8983SJohannes Doerfert /// removes a target shared library from the target execution image
328330d8983SJohannes Doerfert void __tgt_unregister_lib(__tgt_bin_desc *Desc);
329330d8983SJohannes Doerfert 
330330d8983SJohannes Doerfert // creates the host to target data mapping, stores it in the
331330d8983SJohannes Doerfert // libomptarget.so internal structure (an entry in a stack of data maps) and
332330d8983SJohannes Doerfert // passes the data to the device;
333330d8983SJohannes Doerfert void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
334330d8983SJohannes Doerfert                              void **Args, int64_t *ArgSizes, int64_t *ArgTypes);
335330d8983SJohannes Doerfert void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
336330d8983SJohannes Doerfert                                     void **ArgsBase, void **Args,
337330d8983SJohannes Doerfert                                     int64_t *ArgSizes, int64_t *ArgTypes,
338330d8983SJohannes Doerfert                                     int32_t DepNum, void *DepList,
339330d8983SJohannes Doerfert                                     int32_t NoAliasDepNum,
340330d8983SJohannes Doerfert                                     void *NoAliasDepList);
341330d8983SJohannes Doerfert void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
342330d8983SJohannes Doerfert                                     int32_t ArgNum, void **ArgsBase,
343330d8983SJohannes Doerfert                                     void **Args, int64_t *ArgSizes,
344330d8983SJohannes Doerfert                                     int64_t *ArgTypes, map_var_info_t *ArgNames,
345330d8983SJohannes Doerfert                                     void **ArgMappers);
346330d8983SJohannes Doerfert void __tgt_target_data_begin_nowait_mapper(
347330d8983SJohannes Doerfert     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
348330d8983SJohannes Doerfert     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
349330d8983SJohannes Doerfert     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
350330d8983SJohannes Doerfert     void *NoAliasDepList);
351330d8983SJohannes Doerfert 
352330d8983SJohannes Doerfert // passes data from the target, release target memory and destroys the
353330d8983SJohannes Doerfert // host-target mapping (top entry from the stack of data maps) created by
354330d8983SJohannes Doerfert // the last __tgt_target_data_begin
355330d8983SJohannes Doerfert void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
356330d8983SJohannes Doerfert                            void **Args, int64_t *ArgSizes, int64_t *ArgTypes);
357330d8983SJohannes Doerfert void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
358330d8983SJohannes Doerfert                                   void **ArgsBase, void **Args,
359330d8983SJohannes Doerfert                                   int64_t *ArgSizes, int64_t *ArgTypes,
360330d8983SJohannes Doerfert                                   int32_t DepNum, void *DepList,
361330d8983SJohannes Doerfert                                   int32_t NoAliasDepNum, void *NoAliasDepList);
362330d8983SJohannes Doerfert void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
363330d8983SJohannes Doerfert                                   int32_t ArgNum, void **ArgsBase, void **Args,
364330d8983SJohannes Doerfert                                   int64_t *ArgSizes, int64_t *ArgTypes,
365330d8983SJohannes Doerfert                                   map_var_info_t *ArgNames, void **ArgMappers);
366330d8983SJohannes Doerfert void __tgt_target_data_end_nowait_mapper(
367330d8983SJohannes Doerfert     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
368330d8983SJohannes Doerfert     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
369330d8983SJohannes Doerfert     void **ArgMappers, int32_t depNum, void *depList, int32_t NoAliasDepNum,
370330d8983SJohannes Doerfert     void *NoAliasDepList);
371330d8983SJohannes Doerfert 
372330d8983SJohannes Doerfert /// passes data to/from the target
373330d8983SJohannes Doerfert void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
374330d8983SJohannes Doerfert                               void **Args, int64_t *ArgSizes,
375330d8983SJohannes Doerfert                               int64_t *ArgTypes);
376330d8983SJohannes Doerfert void __tgt_target_data_update_nowait(int64_t DeviceId, int32_t ArgNum,
377330d8983SJohannes Doerfert                                      void **ArgsBase, void **Args,
378330d8983SJohannes Doerfert                                      int64_t *ArgSizes, int64_t *ArgTypes,
379330d8983SJohannes Doerfert                                      int32_t DepNum, void *DepList,
380330d8983SJohannes Doerfert                                      int32_t NoAliasDepNum,
381330d8983SJohannes Doerfert                                      void *NoAliasDepList);
382330d8983SJohannes Doerfert void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
383330d8983SJohannes Doerfert                                      int32_t ArgNum, void **ArgsBase,
384330d8983SJohannes Doerfert                                      void **Args, int64_t *ArgSizes,
385330d8983SJohannes Doerfert                                      int64_t *ArgTypes,
386330d8983SJohannes Doerfert                                      map_var_info_t *ArgNames,
387330d8983SJohannes Doerfert                                      void **ArgMappers);
388330d8983SJohannes Doerfert void __tgt_target_data_update_nowait_mapper(
389330d8983SJohannes Doerfert     ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
390330d8983SJohannes Doerfert     void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
391330d8983SJohannes Doerfert     void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
392330d8983SJohannes Doerfert     void *NoAliasDepList);
393330d8983SJohannes Doerfert 
394330d8983SJohannes Doerfert // Performs the same actions as data_begin in case ArgNum is non-zero
395330d8983SJohannes Doerfert // and initiates run of offloaded region on target platform; if ArgNum
396330d8983SJohannes Doerfert // is non-zero after the region execution is done it also performs the
397330d8983SJohannes Doerfert // same action as data_end above. The following types are used; this
398330d8983SJohannes Doerfert // function returns 0 if it was able to transfer the execution to a
399330d8983SJohannes Doerfert // target and an int different from zero otherwise.
400330d8983SJohannes Doerfert int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
401330d8983SJohannes Doerfert                         int32_t ThreadLimit, void *HostPtr, KernelArgsTy *Args);
402330d8983SJohannes Doerfert 
403330d8983SJohannes Doerfert // Non-blocking synchronization for target nowait regions. This function
404330d8983SJohannes Doerfert // acquires the asynchronous context from task data of the current task being
405330d8983SJohannes Doerfert // executed and tries to query for the completion of its operations. If the
406330d8983SJohannes Doerfert // operations are still pending, the function returns immediately. If the
407330d8983SJohannes Doerfert // operations are completed, all the post-processing procedures stored in the
408330d8983SJohannes Doerfert // asynchronous context are executed and the context is removed from the task
409330d8983SJohannes Doerfert // data.
410330d8983SJohannes Doerfert void __tgt_target_nowait_query(void **AsyncHandle);
411330d8983SJohannes Doerfert 
412330d8983SJohannes Doerfert /// Executes a target kernel by replaying recorded kernel arguments and
413330d8983SJohannes Doerfert /// device memory.
414330d8983SJohannes Doerfert int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, void *HostPtr,
415330d8983SJohannes Doerfert                                void *DeviceMemory, int64_t DeviceMemorySize,
416330d8983SJohannes Doerfert                                void **TgtArgs, ptrdiff_t *TgtOffsets,
417330d8983SJohannes Doerfert                                int32_t NumArgs, int32_t NumTeams,
418330d8983SJohannes Doerfert                                int32_t ThreadLimit, uint64_t LoopTripCount);
419330d8983SJohannes Doerfert 
420330d8983SJohannes Doerfert void __tgt_set_info_flag(uint32_t);
421330d8983SJohannes Doerfert 
422330d8983SJohannes Doerfert int __tgt_print_device_info(int64_t DeviceId);
423330d8983SJohannes Doerfert 
424330d8983SJohannes Doerfert int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize,
425330d8983SJohannes Doerfert                                  void *VAddr, bool IsRecord, bool SaveOutput,
426330d8983SJohannes Doerfert                                  uint64_t &ReqPtrArgOffset);
427330d8983SJohannes Doerfert 
428330d8983SJohannes Doerfert #ifdef __cplusplus
429330d8983SJohannes Doerfert }
430330d8983SJohannes Doerfert #endif
431330d8983SJohannes Doerfert 
432330d8983SJohannes Doerfert #ifdef __cplusplus
433330d8983SJohannes Doerfert #define EXTERN extern "C"
434330d8983SJohannes Doerfert #else
435330d8983SJohannes Doerfert #define EXTERN extern
436330d8983SJohannes Doerfert #endif
437330d8983SJohannes Doerfert 
438330d8983SJohannes Doerfert #endif // _OMPTARGET_H_
439