1330d8983SJohannes Doerfert //===-------- omptarget.h - Target independent OpenMP target RTL -- C++ -*-===// 2330d8983SJohannes Doerfert // 3330d8983SJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4330d8983SJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information. 5330d8983SJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6330d8983SJohannes Doerfert // 7330d8983SJohannes Doerfert //===----------------------------------------------------------------------===// 8330d8983SJohannes Doerfert // 9330d8983SJohannes Doerfert // Interface to be used by Clang during the codegen of a 10330d8983SJohannes Doerfert // target region. 11330d8983SJohannes Doerfert // 12330d8983SJohannes Doerfert //===----------------------------------------------------------------------===// 13330d8983SJohannes Doerfert 14330d8983SJohannes Doerfert #ifndef _OMPTARGET_H_ 15330d8983SJohannes Doerfert #define _OMPTARGET_H_ 16330d8983SJohannes Doerfert 17330d8983SJohannes Doerfert #include "Shared/APITypes.h" 18330d8983SJohannes Doerfert #include "Shared/Environment.h" 19330d8983SJohannes Doerfert #include "Shared/SourceInfo.h" 20330d8983SJohannes Doerfert 21330d8983SJohannes Doerfert #include "OpenMP/InternalTypes.h" 22330d8983SJohannes Doerfert 23330d8983SJohannes Doerfert #include <cstddef> 24330d8983SJohannes Doerfert #include <cstdint> 25330d8983SJohannes Doerfert #include <deque> 26330d8983SJohannes Doerfert #include <functional> 27330d8983SJohannes Doerfert #include <type_traits> 28330d8983SJohannes Doerfert 29330d8983SJohannes Doerfert #include "llvm/ADT/SmallVector.h" 30330d8983SJohannes Doerfert 31330d8983SJohannes Doerfert #define OFFLOAD_SUCCESS (0) 32330d8983SJohannes Doerfert #define OFFLOAD_FAIL (~0) 33330d8983SJohannes Doerfert 34330d8983SJohannes Doerfert #define OFFLOAD_DEVICE_DEFAULT -1 35330d8983SJohannes Doerfert 36330d8983SJohannes Doerfert // Don't format out enums and structs. 37330d8983SJohannes Doerfert // clang-format off 38330d8983SJohannes Doerfert 39330d8983SJohannes Doerfert /// return flags of __tgt_target_XXX public APIs 40330d8983SJohannes Doerfert enum __tgt_target_return_t : int { 41330d8983SJohannes Doerfert /// successful offload executed on a target device 42330d8983SJohannes Doerfert OMP_TGT_SUCCESS = 0, 43330d8983SJohannes Doerfert /// offload may not execute on the requested target device 44330d8983SJohannes Doerfert /// this scenario can be caused by the device not available or unsupported 45330d8983SJohannes Doerfert /// as described in the Execution Model in the specifcation 46330d8983SJohannes Doerfert /// this status may not be used for target device execution failure 47330d8983SJohannes Doerfert /// which should be handled internally in libomptarget 48330d8983SJohannes Doerfert OMP_TGT_FAIL = ~0 49330d8983SJohannes Doerfert }; 50330d8983SJohannes Doerfert 51330d8983SJohannes Doerfert /// Data attributes for each data reference used in an OpenMP target region. 52330d8983SJohannes Doerfert enum tgt_map_type { 53330d8983SJohannes Doerfert // No flags 54330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_NONE = 0x000, 55330d8983SJohannes Doerfert // copy data from host to device 56330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_TO = 0x001, 57330d8983SJohannes Doerfert // copy data from device to host 58330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_FROM = 0x002, 59330d8983SJohannes Doerfert // copy regardless of the reference count 60330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_ALWAYS = 0x004, 61330d8983SJohannes Doerfert // force unmapping of data 62330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_DELETE = 0x008, 63330d8983SJohannes Doerfert // map the pointer as well as the pointee 64330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_PTR_AND_OBJ = 0x010, 65330d8983SJohannes Doerfert // pass device base address to kernel 66330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_TARGET_PARAM = 0x020, 67330d8983SJohannes Doerfert // return base device address of mapped data 68330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_RETURN_PARAM = 0x040, 69330d8983SJohannes Doerfert // private variable - not mapped 70330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_PRIVATE = 0x080, 71330d8983SJohannes Doerfert // copy by value - not mapped 72330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_LITERAL = 0x100, 73330d8983SJohannes Doerfert // mapping is implicit 74330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_IMPLICIT = 0x200, 75330d8983SJohannes Doerfert // copy data to device 76330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_CLOSE = 0x400, 77330d8983SJohannes Doerfert // runtime error if not already allocated 78330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_PRESENT = 0x1000, 79330d8983SJohannes Doerfert // use a separate reference counter so that the data cannot be unmapped within 80330d8983SJohannes Doerfert // the structured region 81330d8983SJohannes Doerfert // This is an OpenMP extension for the sake of OpenACC support. 82330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_OMPX_HOLD = 0x2000, 83330d8983SJohannes Doerfert // descriptor for non-contiguous target-update 84330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000, 85330d8983SJohannes Doerfert // member of struct, member given by [16 MSBs] - 1 86330d8983SJohannes Doerfert OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000 87330d8983SJohannes Doerfert }; 88330d8983SJohannes Doerfert 89330d8983SJohannes Doerfert /// Flags for offload entries. 90330d8983SJohannes Doerfert enum OpenMPOffloadingDeclareTargetFlags { 91330d8983SJohannes Doerfert /// Mark the entry global as having a 'link' attribute. 92330d8983SJohannes Doerfert OMP_DECLARE_TARGET_LINK = 0x01, 93330d8983SJohannes Doerfert /// Mark the entry global as being an indirectly callable function. 94330d8983SJohannes Doerfert OMP_DECLARE_TARGET_INDIRECT = 0x08, 95330d8983SJohannes Doerfert /// This is an entry corresponding to a requirement to be registered. 96330d8983SJohannes Doerfert OMP_REGISTER_REQUIRES = 0x10, 97330d8983SJohannes Doerfert }; 98330d8983SJohannes Doerfert 99330d8983SJohannes Doerfert enum TargetAllocTy : int32_t { 100330d8983SJohannes Doerfert TARGET_ALLOC_DEVICE = 0, 101330d8983SJohannes Doerfert TARGET_ALLOC_HOST, 102330d8983SJohannes Doerfert TARGET_ALLOC_SHARED, 103330d8983SJohannes Doerfert TARGET_ALLOC_DEFAULT, 104330d8983SJohannes Doerfert /// The allocation will not block on other streams. 105330d8983SJohannes Doerfert TARGET_ALLOC_DEVICE_NON_BLOCKING, 106330d8983SJohannes Doerfert }; 107330d8983SJohannes Doerfert 108330d8983SJohannes Doerfert inline KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr, 109330d8983SJohannes Doerfert nullptr, nullptr, nullptr, nullptr, 110*80525dfcSJohannes Doerfert 0, {0,0,0}, {1, 0, 0}, {1, 0, 0}, 0}; 111330d8983SJohannes Doerfert 112330d8983SJohannes Doerfert struct DeviceTy; 113330d8983SJohannes Doerfert 114330d8983SJohannes Doerfert /// The libomptarget wrapper around a __tgt_async_info object directly 115330d8983SJohannes Doerfert /// associated with a libomptarget layer device. RAII semantics to avoid 116330d8983SJohannes Doerfert /// mistakes. 117330d8983SJohannes Doerfert class AsyncInfoTy { 118330d8983SJohannes Doerfert public: 119330d8983SJohannes Doerfert enum class SyncTy { BLOCKING, NON_BLOCKING }; 120330d8983SJohannes Doerfert 121330d8983SJohannes Doerfert private: 122330d8983SJohannes Doerfert /// Locations we used in (potentially) asynchronous calls which should live 123330d8983SJohannes Doerfert /// as long as this AsyncInfoTy object. 124330d8983SJohannes Doerfert std::deque<void *> BufferLocations; 125330d8983SJohannes Doerfert 126330d8983SJohannes Doerfert /// Post-processing operations executed after a successful synchronization. 127330d8983SJohannes Doerfert /// \note the post-processing function should return OFFLOAD_SUCCESS or 128330d8983SJohannes Doerfert /// OFFLOAD_FAIL appropriately. 129330d8983SJohannes Doerfert using PostProcFuncTy = std::function<int()>; 130330d8983SJohannes Doerfert llvm::SmallVector<PostProcFuncTy> PostProcessingFunctions; 131330d8983SJohannes Doerfert 132330d8983SJohannes Doerfert __tgt_async_info AsyncInfo; 133330d8983SJohannes Doerfert DeviceTy &Device; 134330d8983SJohannes Doerfert 135330d8983SJohannes Doerfert public: 136330d8983SJohannes Doerfert /// Synchronization method to be used. 137330d8983SJohannes Doerfert SyncTy SyncType; 138330d8983SJohannes Doerfert 139330d8983SJohannes Doerfert AsyncInfoTy(DeviceTy &Device, SyncTy SyncType = SyncTy::BLOCKING) 140330d8983SJohannes Doerfert : Device(Device), SyncType(SyncType) {} 141330d8983SJohannes Doerfert ~AsyncInfoTy() { synchronize(); } 142330d8983SJohannes Doerfert 143330d8983SJohannes Doerfert /// Implicit conversion to the __tgt_async_info which is used in the 144330d8983SJohannes Doerfert /// plugin interface. 145330d8983SJohannes Doerfert operator __tgt_async_info *() { return &AsyncInfo; } 146330d8983SJohannes Doerfert 147330d8983SJohannes Doerfert /// Synchronize all pending actions. 148330d8983SJohannes Doerfert /// 149330d8983SJohannes Doerfert /// \note synchronization will be performance in a blocking or non-blocking 150330d8983SJohannes Doerfert /// manner, depending on the SyncType. 151330d8983SJohannes Doerfert /// 152330d8983SJohannes Doerfert /// \note if the operations are completed, the registered post-processing 153330d8983SJohannes Doerfert /// functions will be executed once and unregistered afterwards. 154330d8983SJohannes Doerfert /// 155330d8983SJohannes Doerfert /// \returns OFFLOAD_FAIL or OFFLOAD_SUCCESS appropriately. 156330d8983SJohannes Doerfert int synchronize(); 157330d8983SJohannes Doerfert 158330d8983SJohannes Doerfert /// Return a void* reference with a lifetime that is at least as long as this 159330d8983SJohannes Doerfert /// AsyncInfoTy object. The location can be used as intermediate buffer. 160330d8983SJohannes Doerfert void *&getVoidPtrLocation(); 161330d8983SJohannes Doerfert 162330d8983SJohannes Doerfert /// Check if all asynchronous operations are completed. 163330d8983SJohannes Doerfert /// 164330d8983SJohannes Doerfert /// \note only a lightweight check. If needed, use synchronize() to query the 165330d8983SJohannes Doerfert /// status of AsyncInfo before checking. 166330d8983SJohannes Doerfert /// 167330d8983SJohannes Doerfert /// \returns true if there is no pending asynchronous operations, false 168330d8983SJohannes Doerfert /// otherwise. 169330d8983SJohannes Doerfert bool isDone() const; 170330d8983SJohannes Doerfert 171330d8983SJohannes Doerfert /// Add a new post-processing function to be executed after synchronization. 172330d8983SJohannes Doerfert /// 173330d8983SJohannes Doerfert /// \param[in] Function is a templated function (e.g., function pointers, 174330d8983SJohannes Doerfert /// lambdas, std::function) that can be convertible to a PostProcFuncTy (i.e., 175330d8983SJohannes Doerfert /// it must have int() as its function signature). 176330d8983SJohannes Doerfert template <typename FuncTy> void addPostProcessingFunction(FuncTy &&Function) { 177330d8983SJohannes Doerfert static_assert(std::is_convertible_v<FuncTy, PostProcFuncTy>, 178330d8983SJohannes Doerfert "Invalid post-processing function type. Please check " 179330d8983SJohannes Doerfert "function signature!"); 180330d8983SJohannes Doerfert PostProcessingFunctions.emplace_back(Function); 181330d8983SJohannes Doerfert } 182330d8983SJohannes Doerfert 183330d8983SJohannes Doerfert private: 184330d8983SJohannes Doerfert /// Run all the post-processing functions sequentially. 185330d8983SJohannes Doerfert /// 186330d8983SJohannes Doerfert /// \note after a successful execution, all previously registered functions 187330d8983SJohannes Doerfert /// are unregistered. 188330d8983SJohannes Doerfert /// 189330d8983SJohannes Doerfert /// \returns OFFLOAD_FAIL if any post-processing function failed, 190330d8983SJohannes Doerfert /// OFFLOAD_SUCCESS otherwise. 191330d8983SJohannes Doerfert int32_t runPostProcessing(); 192330d8983SJohannes Doerfert 193330d8983SJohannes Doerfert /// Check if the internal asynchronous info queue is empty or not. 194330d8983SJohannes Doerfert /// 195330d8983SJohannes Doerfert /// \returns true if empty, false otherwise. 196330d8983SJohannes Doerfert bool isQueueEmpty() const; 197330d8983SJohannes Doerfert }; 198330d8983SJohannes Doerfert 199330d8983SJohannes Doerfert // Wrapper for task stored async info objects. 200330d8983SJohannes Doerfert class TaskAsyncInfoWrapperTy { 201330d8983SJohannes Doerfert // Invalid GTID as defined by libomp; keep in sync 202330d8983SJohannes Doerfert static constexpr int KMP_GTID_DNE = -2; 203330d8983SJohannes Doerfert 204330d8983SJohannes Doerfert const int ExecThreadID = KMP_GTID_DNE; 205330d8983SJohannes Doerfert AsyncInfoTy LocalAsyncInfo; 206330d8983SJohannes Doerfert AsyncInfoTy *AsyncInfo = &LocalAsyncInfo; 207330d8983SJohannes Doerfert void **TaskAsyncInfoPtr = nullptr; 208330d8983SJohannes Doerfert 209330d8983SJohannes Doerfert public: 210330d8983SJohannes Doerfert TaskAsyncInfoWrapperTy(DeviceTy &Device) 211330d8983SJohannes Doerfert : ExecThreadID(__kmpc_global_thread_num(NULL)), LocalAsyncInfo(Device) { 212330d8983SJohannes Doerfert // If we failed to acquired the current global thread id, we cannot 213330d8983SJohannes Doerfert // re-enqueue the current task. Thus we should use the local blocking async 214330d8983SJohannes Doerfert // info. 215330d8983SJohannes Doerfert if (ExecThreadID == KMP_GTID_DNE) 216330d8983SJohannes Doerfert return; 217330d8983SJohannes Doerfert 218330d8983SJohannes Doerfert // Only tasks with an assigned task team can be re-enqueue and thus can 219330d8983SJohannes Doerfert // use the non-blocking synchronization scheme. Thus we should use the local 220330d8983SJohannes Doerfert // blocking async info, if we don´t have one. 221330d8983SJohannes Doerfert if (!__kmpc_omp_has_task_team(ExecThreadID)) 222330d8983SJohannes Doerfert return; 223330d8983SJohannes Doerfert 224330d8983SJohannes Doerfert // Acquire a pointer to the AsyncInfo stored inside the current task being 225330d8983SJohannes Doerfert // executed. 226330d8983SJohannes Doerfert TaskAsyncInfoPtr = __kmpc_omp_get_target_async_handle_ptr(ExecThreadID); 227330d8983SJohannes Doerfert 228330d8983SJohannes Doerfert // If we cannot acquire such pointer, fallback to using the local blocking 229330d8983SJohannes Doerfert // async info. 230330d8983SJohannes Doerfert if (!TaskAsyncInfoPtr) 231330d8983SJohannes Doerfert return; 232330d8983SJohannes Doerfert 233330d8983SJohannes Doerfert // When creating a new task async info, the task handle must always be 234330d8983SJohannes Doerfert // invalid. We must never overwrite any task async handle and there should 235330d8983SJohannes Doerfert // never be any valid handle store inside the task at this point. 236330d8983SJohannes Doerfert assert((*TaskAsyncInfoPtr) == nullptr && 237330d8983SJohannes Doerfert "Task async handle is not empty when dispatching new device " 238330d8983SJohannes Doerfert "operations. The handle was not cleared properly or " 239330d8983SJohannes Doerfert "__tgt_target_nowait_query should have been called!"); 240330d8983SJohannes Doerfert 241330d8983SJohannes Doerfert // If no valid async handle is present, a new AsyncInfo will be allocated 242330d8983SJohannes Doerfert // and stored in the current task. 243330d8983SJohannes Doerfert AsyncInfo = new AsyncInfoTy(Device, AsyncInfoTy::SyncTy::NON_BLOCKING); 244330d8983SJohannes Doerfert *TaskAsyncInfoPtr = (void *)AsyncInfo; 245330d8983SJohannes Doerfert } 246330d8983SJohannes Doerfert 247330d8983SJohannes Doerfert ~TaskAsyncInfoWrapperTy() { 248330d8983SJohannes Doerfert // Local async info destruction is automatically handled by ~AsyncInfoTy. 249330d8983SJohannes Doerfert if (AsyncInfo == &LocalAsyncInfo) 250330d8983SJohannes Doerfert return; 251330d8983SJohannes Doerfert 252330d8983SJohannes Doerfert // If the are device operations still pending, return immediately without 253330d8983SJohannes Doerfert // deallocating the handle. 254330d8983SJohannes Doerfert if (!AsyncInfo->isDone()) 255330d8983SJohannes Doerfert return; 256330d8983SJohannes Doerfert 257330d8983SJohannes Doerfert // Delete the handle and unset it from the OpenMP task data. 258330d8983SJohannes Doerfert delete AsyncInfo; 259330d8983SJohannes Doerfert *TaskAsyncInfoPtr = nullptr; 260330d8983SJohannes Doerfert } 261330d8983SJohannes Doerfert 262330d8983SJohannes Doerfert operator AsyncInfoTy &() { return *AsyncInfo; } 263330d8983SJohannes Doerfert }; 264330d8983SJohannes Doerfert 265330d8983SJohannes Doerfert /// This struct is a record of non-contiguous information 266330d8983SJohannes Doerfert struct __tgt_target_non_contig { 267330d8983SJohannes Doerfert uint64_t Offset; 268330d8983SJohannes Doerfert uint64_t Count; 269330d8983SJohannes Doerfert uint64_t Stride; 270330d8983SJohannes Doerfert }; 271330d8983SJohannes Doerfert 272330d8983SJohannes Doerfert #ifdef __cplusplus 273330d8983SJohannes Doerfert extern "C" { 274330d8983SJohannes Doerfert #endif 275330d8983SJohannes Doerfert 276330d8983SJohannes Doerfert void ompx_dump_mapping_tables(void); 277330d8983SJohannes Doerfert int omp_get_num_devices(void); 278330d8983SJohannes Doerfert int omp_get_device_num(void); 279330d8983SJohannes Doerfert int omp_get_initial_device(void); 280330d8983SJohannes Doerfert void *omp_target_alloc(size_t Size, int DeviceNum); 281330d8983SJohannes Doerfert void omp_target_free(void *DevicePtr, int DeviceNum); 282330d8983SJohannes Doerfert int omp_target_is_present(const void *Ptr, int DeviceNum); 283330d8983SJohannes Doerfert int omp_target_memcpy(void *Dst, const void *Src, size_t Length, 284330d8983SJohannes Doerfert size_t DstOffset, size_t SrcOffset, int DstDevice, 285330d8983SJohannes Doerfert int SrcDevice); 286330d8983SJohannes Doerfert int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, 287330d8983SJohannes Doerfert int NumDims, const size_t *Volume, 288330d8983SJohannes Doerfert const size_t *DstOffsets, const size_t *SrcOffsets, 289330d8983SJohannes Doerfert const size_t *DstDimensions, 290330d8983SJohannes Doerfert const size_t *SrcDimensions, int DstDevice, 291330d8983SJohannes Doerfert int SrcDevice); 292330d8983SJohannes Doerfert void *omp_target_memset(void *Ptr, int C, size_t N, int DeviceNum); 293330d8983SJohannes Doerfert int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, 294330d8983SJohannes Doerfert size_t Size, size_t DeviceOffset, int DeviceNum); 295330d8983SJohannes Doerfert int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum); 296330d8983SJohannes Doerfert 297330d8983SJohannes Doerfert /// Explicit target memory allocators 298330d8983SJohannes Doerfert /// Using the llvm_ prefix until they become part of the OpenMP standard. 299330d8983SJohannes Doerfert void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum); 300330d8983SJohannes Doerfert void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum); 301330d8983SJohannes Doerfert void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum); 302330d8983SJohannes Doerfert 303330d8983SJohannes Doerfert /// Explicit target memory deallocators 304330d8983SJohannes Doerfert /// Using the llvm_ prefix until they become part of the OpenMP standard. 305330d8983SJohannes Doerfert void llvm_omp_target_free_device(void *DevicePtr, int DeviceNum); 306330d8983SJohannes Doerfert void llvm_omp_target_free_host(void *DevicePtr, int DeviceNum); 307330d8983SJohannes Doerfert void llvm_omp_target_free_shared(void *DevicePtr, int DeviceNum); 308330d8983SJohannes Doerfert 309330d8983SJohannes Doerfert /// Dummy target so we have a symbol for generating host fallback. 310330d8983SJohannes Doerfert void *llvm_omp_target_dynamic_shared_alloc(); 311330d8983SJohannes Doerfert 312330d8983SJohannes Doerfert /// add the clauses of the requires directives in a given file 313330d8983SJohannes Doerfert void __tgt_register_requires(int64_t Flags); 314330d8983SJohannes Doerfert 315330d8983SJohannes Doerfert /// Initializes the runtime library. 316330d8983SJohannes Doerfert void __tgt_rtl_init(); 317330d8983SJohannes Doerfert 318330d8983SJohannes Doerfert /// Deinitializes the runtime library. 319330d8983SJohannes Doerfert void __tgt_rtl_deinit(); 320330d8983SJohannes Doerfert 321330d8983SJohannes Doerfert /// adds a target shared library to the target execution image 322330d8983SJohannes Doerfert void __tgt_register_lib(__tgt_bin_desc *Desc); 323330d8983SJohannes Doerfert 324330d8983SJohannes Doerfert /// Initialize all RTLs at once 325330d8983SJohannes Doerfert void __tgt_init_all_rtls(); 326330d8983SJohannes Doerfert 327330d8983SJohannes Doerfert /// removes a target shared library from the target execution image 328330d8983SJohannes Doerfert void __tgt_unregister_lib(__tgt_bin_desc *Desc); 329330d8983SJohannes Doerfert 330330d8983SJohannes Doerfert // creates the host to target data mapping, stores it in the 331330d8983SJohannes Doerfert // libomptarget.so internal structure (an entry in a stack of data maps) and 332330d8983SJohannes Doerfert // passes the data to the device; 333330d8983SJohannes Doerfert void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 334330d8983SJohannes Doerfert void **Args, int64_t *ArgSizes, int64_t *ArgTypes); 335330d8983SJohannes Doerfert void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum, 336330d8983SJohannes Doerfert void **ArgsBase, void **Args, 337330d8983SJohannes Doerfert int64_t *ArgSizes, int64_t *ArgTypes, 338330d8983SJohannes Doerfert int32_t DepNum, void *DepList, 339330d8983SJohannes Doerfert int32_t NoAliasDepNum, 340330d8983SJohannes Doerfert void *NoAliasDepList); 341330d8983SJohannes Doerfert void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, 342330d8983SJohannes Doerfert int32_t ArgNum, void **ArgsBase, 343330d8983SJohannes Doerfert void **Args, int64_t *ArgSizes, 344330d8983SJohannes Doerfert int64_t *ArgTypes, map_var_info_t *ArgNames, 345330d8983SJohannes Doerfert void **ArgMappers); 346330d8983SJohannes Doerfert void __tgt_target_data_begin_nowait_mapper( 347330d8983SJohannes Doerfert ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 348330d8983SJohannes Doerfert void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 349330d8983SJohannes Doerfert void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 350330d8983SJohannes Doerfert void *NoAliasDepList); 351330d8983SJohannes Doerfert 352330d8983SJohannes Doerfert // passes data from the target, release target memory and destroys the 353330d8983SJohannes Doerfert // host-target mapping (top entry from the stack of data maps) created by 354330d8983SJohannes Doerfert // the last __tgt_target_data_begin 355330d8983SJohannes Doerfert void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 356330d8983SJohannes Doerfert void **Args, int64_t *ArgSizes, int64_t *ArgTypes); 357330d8983SJohannes Doerfert void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum, 358330d8983SJohannes Doerfert void **ArgsBase, void **Args, 359330d8983SJohannes Doerfert int64_t *ArgSizes, int64_t *ArgTypes, 360330d8983SJohannes Doerfert int32_t DepNum, void *DepList, 361330d8983SJohannes Doerfert int32_t NoAliasDepNum, void *NoAliasDepList); 362330d8983SJohannes Doerfert void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, 363330d8983SJohannes Doerfert int32_t ArgNum, void **ArgsBase, void **Args, 364330d8983SJohannes Doerfert int64_t *ArgSizes, int64_t *ArgTypes, 365330d8983SJohannes Doerfert map_var_info_t *ArgNames, void **ArgMappers); 366330d8983SJohannes Doerfert void __tgt_target_data_end_nowait_mapper( 367330d8983SJohannes Doerfert ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 368330d8983SJohannes Doerfert void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 369330d8983SJohannes Doerfert void **ArgMappers, int32_t depNum, void *depList, int32_t NoAliasDepNum, 370330d8983SJohannes Doerfert void *NoAliasDepList); 371330d8983SJohannes Doerfert 372330d8983SJohannes Doerfert /// passes data to/from the target 373330d8983SJohannes Doerfert void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 374330d8983SJohannes Doerfert void **Args, int64_t *ArgSizes, 375330d8983SJohannes Doerfert int64_t *ArgTypes); 376330d8983SJohannes Doerfert void __tgt_target_data_update_nowait(int64_t DeviceId, int32_t ArgNum, 377330d8983SJohannes Doerfert void **ArgsBase, void **Args, 378330d8983SJohannes Doerfert int64_t *ArgSizes, int64_t *ArgTypes, 379330d8983SJohannes Doerfert int32_t DepNum, void *DepList, 380330d8983SJohannes Doerfert int32_t NoAliasDepNum, 381330d8983SJohannes Doerfert void *NoAliasDepList); 382330d8983SJohannes Doerfert void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, 383330d8983SJohannes Doerfert int32_t ArgNum, void **ArgsBase, 384330d8983SJohannes Doerfert void **Args, int64_t *ArgSizes, 385330d8983SJohannes Doerfert int64_t *ArgTypes, 386330d8983SJohannes Doerfert map_var_info_t *ArgNames, 387330d8983SJohannes Doerfert void **ArgMappers); 388330d8983SJohannes Doerfert void __tgt_target_data_update_nowait_mapper( 389330d8983SJohannes Doerfert ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 390330d8983SJohannes Doerfert void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 391330d8983SJohannes Doerfert void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 392330d8983SJohannes Doerfert void *NoAliasDepList); 393330d8983SJohannes Doerfert 394330d8983SJohannes Doerfert // Performs the same actions as data_begin in case ArgNum is non-zero 395330d8983SJohannes Doerfert // and initiates run of offloaded region on target platform; if ArgNum 396330d8983SJohannes Doerfert // is non-zero after the region execution is done it also performs the 397330d8983SJohannes Doerfert // same action as data_end above. The following types are used; this 398330d8983SJohannes Doerfert // function returns 0 if it was able to transfer the execution to a 399330d8983SJohannes Doerfert // target and an int different from zero otherwise. 400330d8983SJohannes Doerfert int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, 401330d8983SJohannes Doerfert int32_t ThreadLimit, void *HostPtr, KernelArgsTy *Args); 402330d8983SJohannes Doerfert 403330d8983SJohannes Doerfert // Non-blocking synchronization for target nowait regions. This function 404330d8983SJohannes Doerfert // acquires the asynchronous context from task data of the current task being 405330d8983SJohannes Doerfert // executed and tries to query for the completion of its operations. If the 406330d8983SJohannes Doerfert // operations are still pending, the function returns immediately. If the 407330d8983SJohannes Doerfert // operations are completed, all the post-processing procedures stored in the 408330d8983SJohannes Doerfert // asynchronous context are executed and the context is removed from the task 409330d8983SJohannes Doerfert // data. 410330d8983SJohannes Doerfert void __tgt_target_nowait_query(void **AsyncHandle); 411330d8983SJohannes Doerfert 412330d8983SJohannes Doerfert /// Executes a target kernel by replaying recorded kernel arguments and 413330d8983SJohannes Doerfert /// device memory. 414330d8983SJohannes Doerfert int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, void *HostPtr, 415330d8983SJohannes Doerfert void *DeviceMemory, int64_t DeviceMemorySize, 416330d8983SJohannes Doerfert void **TgtArgs, ptrdiff_t *TgtOffsets, 417330d8983SJohannes Doerfert int32_t NumArgs, int32_t NumTeams, 418330d8983SJohannes Doerfert int32_t ThreadLimit, uint64_t LoopTripCount); 419330d8983SJohannes Doerfert 420330d8983SJohannes Doerfert void __tgt_set_info_flag(uint32_t); 421330d8983SJohannes Doerfert 422330d8983SJohannes Doerfert int __tgt_print_device_info(int64_t DeviceId); 423330d8983SJohannes Doerfert 424330d8983SJohannes Doerfert int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize, 425330d8983SJohannes Doerfert void *VAddr, bool IsRecord, bool SaveOutput, 426330d8983SJohannes Doerfert uint64_t &ReqPtrArgOffset); 427330d8983SJohannes Doerfert 428330d8983SJohannes Doerfert #ifdef __cplusplus 429330d8983SJohannes Doerfert } 430330d8983SJohannes Doerfert #endif 431330d8983SJohannes Doerfert 432330d8983SJohannes Doerfert #ifdef __cplusplus 433330d8983SJohannes Doerfert #define EXTERN extern "C" 434330d8983SJohannes Doerfert #else 435330d8983SJohannes Doerfert #define EXTERN extern 436330d8983SJohannes Doerfert #endif 437330d8983SJohannes Doerfert 438330d8983SJohannes Doerfert #endif // _OMPTARGET_H_ 439