1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "OpenMP/OMPT/Interface.h" 15 #include "OffloadPolicy.h" 16 #include "OpenMP/OMPT/Callback.h" 17 #include "OpenMP/omp.h" 18 #include "PluginManager.h" 19 #include "omptarget.h" 20 #include "private.h" 21 22 #include "Shared/EnvironmentVar.h" 23 #include "Shared/Profile.h" 24 25 #include "Utils/ExponentialBackoff.h" 26 27 #include "llvm/Frontend/OpenMP/OMPConstants.h" 28 29 #include <cassert> 30 #include <cstdint> 31 #include <cstdio> 32 #include <cstdlib> 33 34 #ifdef OMPT_SUPPORT 35 using namespace llvm::omp::target::ompt; 36 #endif 37 38 // If offload is enabled, ensure that device DeviceID has been initialized. 39 // 40 // The return bool indicates if the offload is to the host device 41 // There are three possible results: 42 // - Return false if the taregt device is ready for offload 43 // - Return true without reporting a runtime error if offload is 44 // disabled, perhaps because the initial device was specified. 45 // - Report a runtime error and return true. 46 // 47 // If DeviceID == OFFLOAD_DEVICE_DEFAULT, set DeviceID to the default device. 48 // This step might be skipped if offload is disabled. 49 bool checkDevice(int64_t &DeviceID, ident_t *Loc) { 50 if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) { 51 DP("Offload is disabled\n"); 52 return true; 53 } 54 55 if (DeviceID == OFFLOAD_DEVICE_DEFAULT) { 56 DeviceID = omp_get_default_device(); 57 DP("Use default device id %" PRId64 "\n", DeviceID); 58 } 59 60 // Proposed behavior for OpenMP 5.2 in OpenMP spec github issue 2669. 61 if (omp_get_num_devices() == 0) { 62 DP("omp_get_num_devices() == 0 but offload is manadatory\n"); 63 handleTargetOutcome(false, Loc); 64 return true; 65 } 66 67 if (DeviceID == omp_get_initial_device()) { 68 DP("Device is host (%" PRId64 "), returning as if offload is disabled\n", 69 DeviceID); 70 return true; 71 } 72 return false; 73 } 74 75 //////////////////////////////////////////////////////////////////////////////// 76 /// adds requires flags 77 EXTERN void __tgt_register_requires(int64_t Flags) { 78 MESSAGE("The %s function has been removed. Old OpenMP requirements will not " 79 "be handled", 80 __PRETTY_FUNCTION__); 81 } 82 83 EXTERN void __tgt_rtl_init() { initRuntime(); } 84 EXTERN void __tgt_rtl_deinit() { deinitRuntime(); } 85 86 //////////////////////////////////////////////////////////////////////////////// 87 /// adds a target shared library to the target execution image 88 EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { 89 initRuntime(); 90 if (PM->delayRegisterLib(Desc)) 91 return; 92 93 PM->registerLib(Desc); 94 } 95 96 //////////////////////////////////////////////////////////////////////////////// 97 /// Initialize all available devices without registering any image 98 EXTERN void __tgt_init_all_rtls() { 99 assert(PM && "Runtime not initialized"); 100 PM->initializeAllDevices(); 101 } 102 103 //////////////////////////////////////////////////////////////////////////////// 104 /// unloads a target shared library 105 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { 106 PM->unregisterLib(Desc); 107 108 deinitRuntime(); 109 } 110 111 template <typename TargetAsyncInfoTy> 112 static inline void 113 targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 114 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, 115 map_var_info_t *ArgNames, void **ArgMappers, 116 TargetDataFuncPtrTy TargetDataFunction, const char *RegionTypeMsg, 117 const char *RegionName) { 118 assert(PM && "Runtime not initialized"); 119 static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>, 120 "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); 121 122 TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy", 123 "NumArgs=" + std::to_string(ArgNum), Loc); 124 125 DP("Entering data %s region for device %" PRId64 " with %d mappings\n", 126 RegionName, DeviceId, ArgNum); 127 128 if (checkDevice(DeviceId, Loc)) { 129 DP("Not offloading to device %" PRId64 "\n", DeviceId); 130 return; 131 } 132 133 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 134 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, 135 RegionTypeMsg); 136 #ifdef OMPTARGET_DEBUG 137 for (int I = 0; I < ArgNum; ++I) { 138 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 139 ", Type=0x%" PRIx64 ", Name=%s\n", 140 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], 141 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); 142 } 143 #endif 144 145 auto DeviceOrErr = PM->getDevice(DeviceId); 146 if (!DeviceOrErr) 147 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str()); 148 149 TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr); 150 AsyncInfoTy &AsyncInfo = TargetAsyncInfo; 151 152 /// RAII to establish tool anchors before and after data begin / end / update 153 OMPT_IF_BUILT(assert((TargetDataFunction == targetDataBegin || 154 TargetDataFunction == targetDataEnd || 155 TargetDataFunction == targetDataUpdate) && 156 "Encountered unexpected TargetDataFunction during " 157 "execution of targetData"); 158 auto CallbackFunctions = 159 (TargetDataFunction == targetDataBegin) 160 ? RegionInterface.getCallbacks<ompt_target_enter_data>() 161 : (TargetDataFunction == targetDataEnd) 162 ? RegionInterface.getCallbacks<ompt_target_exit_data>() 163 : RegionInterface.getCallbacks<ompt_target_update>(); 164 InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId, 165 OMPT_GET_RETURN_ADDRESS);) 166 167 int Rc = OFFLOAD_SUCCESS; 168 Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes, 169 ArgTypes, ArgNames, ArgMappers, AsyncInfo, 170 false /*FromMapper=*/); 171 172 if (Rc == OFFLOAD_SUCCESS) 173 Rc = AsyncInfo.synchronize(); 174 175 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 176 } 177 178 /// creates host-to-target data mapping, stores it in the 179 /// libomptarget.so internal structure (an entry in a stack of data maps) 180 /// and passes the data to the device. 181 EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, 182 int32_t ArgNum, void **ArgsBase, 183 void **Args, int64_t *ArgSizes, 184 int64_t *ArgTypes, 185 map_var_info_t *ArgNames, 186 void **ArgMappers) { 187 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 188 targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, 189 ArgTypes, ArgNames, ArgMappers, targetDataBegin, 190 "Entering OpenMP data region with being_mapper", 191 "begin"); 192 } 193 194 EXTERN void __tgt_target_data_begin_nowait_mapper( 195 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 196 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 197 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 198 void *NoAliasDepList) { 199 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 200 targetData<TaskAsyncInfoWrapperTy>( 201 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, 202 ArgMappers, targetDataBegin, 203 "Entering OpenMP data region with being_nowait_mapper", "begin"); 204 } 205 206 /// passes data from the target, releases target memory and destroys 207 /// the host-target mapping (top entry from the stack of data maps) 208 /// created by the last __tgt_target_data_begin. 209 EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, 210 int32_t ArgNum, void **ArgsBase, 211 void **Args, int64_t *ArgSizes, 212 int64_t *ArgTypes, 213 map_var_info_t *ArgNames, 214 void **ArgMappers) { 215 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 216 targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, 217 ArgTypes, ArgNames, ArgMappers, targetDataEnd, 218 "Exiting OpenMP data region with end_mapper", "end"); 219 } 220 221 EXTERN void __tgt_target_data_end_nowait_mapper( 222 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 223 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 224 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 225 void *NoAliasDepList) { 226 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 227 targetData<TaskAsyncInfoWrapperTy>( 228 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, 229 ArgMappers, targetDataEnd, 230 "Exiting OpenMP data region with end_nowait_mapper", "end"); 231 } 232 233 EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, 234 int32_t ArgNum, void **ArgsBase, 235 void **Args, int64_t *ArgSizes, 236 int64_t *ArgTypes, 237 map_var_info_t *ArgNames, 238 void **ArgMappers) { 239 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 240 targetData<AsyncInfoTy>( 241 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, 242 ArgMappers, targetDataUpdate, 243 "Updating data within the OpenMP data region with update_mapper", 244 "update"); 245 } 246 247 EXTERN void __tgt_target_data_update_nowait_mapper( 248 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, 249 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, 250 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, 251 void *NoAliasDepList) { 252 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 253 targetData<TaskAsyncInfoWrapperTy>( 254 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, 255 ArgMappers, targetDataUpdate, 256 "Updating data within the OpenMP data region with update_nowait_mapper", 257 "update"); 258 } 259 260 static KernelArgsTy *upgradeKernelArgs(KernelArgsTy *KernelArgs, 261 KernelArgsTy &LocalKernelArgs, 262 int32_t NumTeams, int32_t ThreadLimit) { 263 if (KernelArgs->Version > OMP_KERNEL_ARG_VERSION) 264 DP("Unexpected ABI version: %u\n", KernelArgs->Version); 265 266 uint32_t UpgradedVersion = KernelArgs->Version; 267 if (KernelArgs->Version < OMP_KERNEL_ARG_VERSION) { 268 // The upgraded version will be based on the kernel launch environment. 269 if (KernelArgs->Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR) 270 UpgradedVersion = OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR - 1; 271 else 272 UpgradedVersion = OMP_KERNEL_ARG_VERSION; 273 } 274 if (UpgradedVersion != KernelArgs->Version) { 275 LocalKernelArgs.Version = UpgradedVersion; 276 LocalKernelArgs.NumArgs = KernelArgs->NumArgs; 277 LocalKernelArgs.ArgBasePtrs = KernelArgs->ArgBasePtrs; 278 LocalKernelArgs.ArgPtrs = KernelArgs->ArgPtrs; 279 LocalKernelArgs.ArgSizes = KernelArgs->ArgSizes; 280 LocalKernelArgs.ArgTypes = KernelArgs->ArgTypes; 281 LocalKernelArgs.ArgNames = KernelArgs->ArgNames; 282 LocalKernelArgs.ArgMappers = KernelArgs->ArgMappers; 283 LocalKernelArgs.Tripcount = KernelArgs->Tripcount; 284 LocalKernelArgs.Flags = KernelArgs->Flags; 285 LocalKernelArgs.DynCGroupMem = 0; 286 LocalKernelArgs.NumTeams[0] = NumTeams; 287 LocalKernelArgs.NumTeams[1] = 1; 288 LocalKernelArgs.NumTeams[2] = 1; 289 LocalKernelArgs.ThreadLimit[0] = ThreadLimit; 290 LocalKernelArgs.ThreadLimit[1] = 1; 291 LocalKernelArgs.ThreadLimit[2] = 1; 292 return &LocalKernelArgs; 293 } 294 295 // FIXME: This is a WA to "calibrate" the bad work done in the front end. 296 // Delete this ugly code after the front end emits proper values. 297 auto CorrectMultiDim = [](uint32_t(&Val)[3]) { 298 if (Val[1] == 0) 299 Val[1] = 1; 300 if (Val[2] == 0) 301 Val[2] = 1; 302 }; 303 CorrectMultiDim(KernelArgs->ThreadLimit); 304 CorrectMultiDim(KernelArgs->NumTeams); 305 306 return KernelArgs; 307 } 308 309 template <typename TargetAsyncInfoTy> 310 static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, 311 int32_t ThreadLimit, void *HostPtr, 312 KernelArgsTy *KernelArgs) { 313 assert(PM && "Runtime not initialized"); 314 static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>, 315 "Target AsyncInfoTy must be convertible to AsyncInfoTy."); 316 DP("Entering target region for device %" PRId64 " with entry point " DPxMOD 317 "\n", 318 DeviceId, DPxPTR(HostPtr)); 319 320 if (checkDevice(DeviceId, Loc)) { 321 DP("Not offloading to device %" PRId64 "\n", DeviceId); 322 return OMP_TGT_FAIL; 323 } 324 325 bool IsTeams = NumTeams != -1; 326 if (!IsTeams) 327 KernelArgs->NumTeams[0] = NumTeams = 1; 328 329 // Auto-upgrade kernel args version 1 to 2. 330 KernelArgsTy LocalKernelArgs; 331 KernelArgs = 332 upgradeKernelArgs(KernelArgs, LocalKernelArgs, NumTeams, ThreadLimit); 333 334 TIMESCOPE_WITH_DETAILS_AND_IDENT( 335 "Runtime: target exe", 336 "NumTeams=" + std::to_string(NumTeams) + 337 ";NumArgs=" + std::to_string(KernelArgs->NumArgs), 338 Loc); 339 340 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) 341 printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs, 342 KernelArgs->ArgSizes, KernelArgs->ArgTypes, 343 KernelArgs->ArgNames, "Entering OpenMP kernel"); 344 #ifdef OMPTARGET_DEBUG 345 for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) { 346 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 347 ", Type=0x%" PRIx64 ", Name=%s\n", 348 I, DPxPTR(KernelArgs->ArgBasePtrs[I]), DPxPTR(KernelArgs->ArgPtrs[I]), 349 KernelArgs->ArgSizes[I], KernelArgs->ArgTypes[I], 350 (KernelArgs->ArgNames) 351 ? getNameFromMapping(KernelArgs->ArgNames[I]).c_str() 352 : "unknown"); 353 } 354 #endif 355 356 auto DeviceOrErr = PM->getDevice(DeviceId); 357 if (!DeviceOrErr) 358 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str()); 359 360 TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr); 361 AsyncInfoTy &AsyncInfo = TargetAsyncInfo; 362 /// RAII to establish tool anchors before and after target region 363 OMPT_IF_BUILT(InterfaceRAII TargetRAII( 364 RegionInterface.getCallbacks<ompt_target>(), DeviceId, 365 /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) 366 367 int Rc = OFFLOAD_SUCCESS; 368 Rc = target(Loc, *DeviceOrErr, HostPtr, *KernelArgs, AsyncInfo); 369 { // required to show syncronization 370 TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: syncronize", "", Loc); 371 if (Rc == OFFLOAD_SUCCESS) 372 Rc = AsyncInfo.synchronize(); 373 374 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 375 assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!"); 376 } 377 return OMP_TGT_SUCCESS; 378 } 379 380 /// Implements a kernel entry that executes the target region on the specified 381 /// device. 382 /// 383 /// \param Loc Source location associated with this target region. 384 /// \param DeviceId The device to execute this region, -1 indicated the default. 385 /// \param NumTeams Number of teams to launch the region with, -1 indicates a 386 /// non-teams region and 0 indicates it was unspecified. 387 /// \param ThreadLimit Limit to the number of threads to use in the kernel 388 /// launch, 0 indicates it was unspecified. 389 /// \param HostPtr The pointer to the host function registered with the kernel. 390 /// \param Args All arguments to this kernel launch (see struct definition). 391 EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, 392 int32_t ThreadLimit, void *HostPtr, 393 KernelArgsTy *KernelArgs) { 394 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 395 if (KernelArgs->Flags.NoWait) 396 return targetKernel<TaskAsyncInfoWrapperTy>( 397 Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, KernelArgs); 398 return targetKernel<AsyncInfoTy>(Loc, DeviceId, NumTeams, ThreadLimit, 399 HostPtr, KernelArgs); 400 } 401 402 /// Activates the record replay mechanism. 403 /// \param DeviceId The device identifier to execute the target region. 404 /// \param MemorySize The number of bytes to be (pre-)allocated 405 /// by the bump allocator 406 /// /param IsRecord Activates the record replay mechanism in 407 /// 'record' mode or 'replay' mode. 408 /// /param SaveOutput Store the device memory after kernel 409 /// execution on persistent storage 410 EXTERN int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize, 411 void *VAddr, bool IsRecord, 412 bool SaveOutput, 413 uint64_t &ReqPtrArgOffset) { 414 assert(PM && "Runtime not initialized"); 415 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 416 auto DeviceOrErr = PM->getDevice(DeviceId); 417 if (!DeviceOrErr) 418 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str()); 419 420 [[maybe_unused]] int Rc = target_activate_rr( 421 *DeviceOrErr, MemorySize, VAddr, IsRecord, SaveOutput, ReqPtrArgOffset); 422 assert(Rc == OFFLOAD_SUCCESS && 423 "__tgt_activate_record_replay unexpected failure!"); 424 return OMP_TGT_SUCCESS; 425 } 426 427 /// Implements a target kernel entry that replays a pre-recorded kernel. 428 /// \param Loc Source location associated with this target region (unused). 429 /// \param DeviceId The device identifier to execute the target region. 430 /// \param HostPtr A pointer to an address that uniquely identifies the kernel. 431 /// \param DeviceMemory A pointer to an array storing device memory data to move 432 /// prior to kernel execution. 433 /// \param DeviceMemorySize The size of the above device memory data in bytes. 434 /// \param TgtArgs An array of pointers of the pre-recorded target kernel 435 /// arguments. 436 /// \param TgtOffsets An array of pointers of the pre-recorded target kernel 437 /// argument offsets. 438 /// \param NumArgs The number of kernel arguments. 439 /// \param NumTeams Number of teams to launch the target region with. 440 /// \param ThreadLimit Limit to the number of threads to use in kernel 441 /// execution. 442 /// \param LoopTripCount The pre-recorded value of the loop tripcount, if any. 443 /// \return OMP_TGT_SUCCESS on success, OMP_TGT_FAIL on failure. 444 EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, 445 void *HostPtr, void *DeviceMemory, 446 int64_t DeviceMemorySize, void **TgtArgs, 447 ptrdiff_t *TgtOffsets, int32_t NumArgs, 448 int32_t NumTeams, int32_t ThreadLimit, 449 uint64_t LoopTripCount) { 450 assert(PM && "Runtime not initialized"); 451 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 452 if (checkDevice(DeviceId, Loc)) { 453 DP("Not offloading to device %" PRId64 "\n", DeviceId); 454 return OMP_TGT_FAIL; 455 } 456 auto DeviceOrErr = PM->getDevice(DeviceId); 457 if (!DeviceOrErr) 458 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str()); 459 460 /// RAII to establish tool anchors before and after target region 461 OMPT_IF_BUILT(InterfaceRAII TargetRAII( 462 RegionInterface.getCallbacks<ompt_target>(), DeviceId, 463 /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) 464 465 AsyncInfoTy AsyncInfo(*DeviceOrErr); 466 int Rc = target_replay(Loc, *DeviceOrErr, HostPtr, DeviceMemory, 467 DeviceMemorySize, TgtArgs, TgtOffsets, NumArgs, 468 NumTeams, ThreadLimit, LoopTripCount, AsyncInfo); 469 if (Rc == OFFLOAD_SUCCESS) 470 Rc = AsyncInfo.synchronize(); 471 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); 472 assert(Rc == OFFLOAD_SUCCESS && 473 "__tgt_target_kernel_replay unexpected failure!"); 474 return OMP_TGT_SUCCESS; 475 } 476 477 // Get the current number of components for a user-defined mapper. 478 EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { 479 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; 480 int64_t Size = MapperComponentsPtr->Components.size(); 481 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", 482 DPxPTR(RtMapperHandle), Size); 483 return Size; 484 } 485 486 // Push back one component for a user-defined mapper. 487 EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, 488 void *Begin, int64_t Size, int64_t Type, 489 void *Name) { 490 DP("__tgt_push_mapper_component(Handle=" DPxMOD 491 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 492 ", Type=0x%" PRIx64 ", Name=%s).\n", 493 DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type, 494 (Name) ? getNameFromMapping(Name).c_str() : "unknown"); 495 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; 496 MapperComponentsPtr->Components.push_back( 497 MapComponentInfoTy(Base, Begin, Size, Type, Name)); 498 } 499 500 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { 501 assert(PM && "Runtime not initialized"); 502 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal(); 503 InfoLevel.store(NewInfoLevel); 504 } 505 506 EXTERN int __tgt_print_device_info(int64_t DeviceId) { 507 assert(PM && "Runtime not initialized"); 508 auto DeviceOrErr = PM->getDevice(DeviceId); 509 if (!DeviceOrErr) 510 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str()); 511 512 return DeviceOrErr->printDeviceInfo(); 513 } 514 515 EXTERN void __tgt_target_nowait_query(void **AsyncHandle) { 516 assert(PM && "Runtime not initialized"); 517 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 518 519 if (!AsyncHandle || !*AsyncHandle) { 520 FATAL_MESSAGE0( 521 1, "Receive an invalid async handle from the current OpenMP task. Is " 522 "this a target nowait region?\n"); 523 } 524 525 // Exponential backoff tries to optimally decide if a thread should just query 526 // for the device operations (work/spin wait on them) or block until they are 527 // completed (use device side blocking mechanism). This allows the runtime to 528 // adapt itself when there are a lot of long-running target regions in-flight. 529 static thread_local utils::ExponentialBackoff QueryCounter( 530 Int64Envar("OMPTARGET_QUERY_COUNT_MAX", 10), 531 Int64Envar("OMPTARGET_QUERY_COUNT_THRESHOLD", 5), 532 Envar<float>("OMPTARGET_QUERY_COUNT_BACKOFF_FACTOR", 0.5f)); 533 534 auto *AsyncInfo = (AsyncInfoTy *)*AsyncHandle; 535 536 // If the thread is actively waiting on too many target nowait regions, we 537 // should use the blocking sync type. 538 if (QueryCounter.isAboveThreshold()) 539 AsyncInfo->SyncType = AsyncInfoTy::SyncTy::BLOCKING; 540 541 if (AsyncInfo->synchronize()) 542 FATAL_MESSAGE0(1, "Error while querying the async queue for completion.\n"); 543 // If there are device operations still pending, return immediately without 544 // deallocating the handle and increase the current thread query count. 545 if (!AsyncInfo->isDone()) { 546 QueryCounter.increment(); 547 return; 548 } 549 550 // When a thread successfully completes a target nowait region, we 551 // exponentially backoff its query counter by the query factor. 552 QueryCounter.decrement(); 553 554 // Delete the handle and unset it from the OpenMP task data. 555 delete AsyncInfo; 556 *AsyncHandle = nullptr; 557 } 558