1 //===----------- api.cpp - Target independent OpenMP target RTL -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of OpenMP API interface functions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PluginManager.h" 14 #include "device.h" 15 #include "omptarget.h" 16 #include "rtl.h" 17 18 #include "OpenMP/InternalTypes.h" 19 #include "OpenMP/Mapping.h" 20 #include "OpenMP/OMPT/Interface.h" 21 #include "OpenMP/omp.h" 22 #include "Shared/Profile.h" 23 24 #include "llvm/ADT/SmallVector.h" 25 26 #include <climits> 27 #include <cstdlib> 28 #include <cstring> 29 #include <mutex> 30 31 EXTERN void ompx_dump_mapping_tables() { 32 ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"}; 33 auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor(); 34 for (auto &Device : PM->devices(ExclusiveDevicesAccessor)) 35 dumpTargetPointerMappings(&Loc, Device, true); 36 } 37 38 #ifdef OMPT_SUPPORT 39 using namespace llvm::omp::target::ompt; 40 #endif 41 42 void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, 43 const char *Name); 44 void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, 45 const char *Name); 46 void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, 47 const char *Name); 48 void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name); 49 50 // Implemented in libomp, they are called from within __tgt_* functions. 51 extern "C" { 52 int __kmpc_get_target_offload(void) __attribute__((weak)); 53 kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, int32_t gtid, int32_t flags, 54 size_t sizeof_kmp_task_t, 55 size_t sizeof_shareds, 56 kmp_routine_entry_t task_entry) 57 __attribute__((weak)); 58 59 kmp_task_t * 60 __kmpc_omp_target_task_alloc(ident_t *loc_ref, int32_t gtid, int32_t flags, 61 size_t sizeof_kmp_task_t, size_t sizeof_shareds, 62 kmp_routine_entry_t task_entry, int64_t device_id) 63 __attribute__((weak)); 64 65 int32_t __kmpc_omp_task_with_deps(ident_t *loc_ref, int32_t gtid, 66 kmp_task_t *new_task, int32_t ndeps, 67 kmp_depend_info_t *dep_list, 68 int32_t ndeps_noalias, 69 kmp_depend_info_t *noalias_dep_list) 70 __attribute__((weak)); 71 } 72 73 EXTERN int omp_get_num_devices(void) { 74 TIMESCOPE(); 75 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 76 size_t NumDevices = PM->getNumDevices(); 77 78 DP("Call to omp_get_num_devices returning %zd\n", NumDevices); 79 80 return NumDevices; 81 } 82 83 EXTERN int omp_get_device_num(void) { 84 TIMESCOPE(); 85 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 86 int HostDevice = omp_get_initial_device(); 87 88 DP("Call to omp_get_device_num returning %d\n", HostDevice); 89 90 return HostDevice; 91 } 92 93 EXTERN int omp_get_initial_device(void) { 94 TIMESCOPE(); 95 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 96 int HostDevice = omp_get_num_devices(); 97 DP("Call to omp_get_initial_device returning %d\n", HostDevice); 98 return HostDevice; 99 } 100 101 EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { 102 TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) + 103 ";size=" + std::to_string(Size)); 104 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 105 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); 106 } 107 108 EXTERN void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum) { 109 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 110 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEVICE, __func__); 111 } 112 113 EXTERN void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum) { 114 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 115 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_HOST, __func__); 116 } 117 118 EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) { 119 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 120 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__); 121 } 122 123 EXTERN void omp_target_free(void *Ptr, int DeviceNum) { 124 TIMESCOPE(); 125 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 126 return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); 127 } 128 129 EXTERN void llvm_omp_target_free_device(void *Ptr, int DeviceNum) { 130 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 131 return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEVICE, __func__); 132 } 133 134 EXTERN void llvm_omp_target_free_host(void *Ptr, int DeviceNum) { 135 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 136 return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_HOST, __func__); 137 } 138 139 EXTERN void llvm_omp_target_free_shared(void *Ptre, int DeviceNum) { 140 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 141 return targetFreeExplicit(Ptre, DeviceNum, TARGET_ALLOC_SHARED, __func__); 142 } 143 144 EXTERN void *llvm_omp_target_dynamic_shared_alloc() { 145 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 146 return nullptr; 147 } 148 149 EXTERN void *llvm_omp_get_dynamic_shared() { 150 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 151 return nullptr; 152 } 153 154 EXTERN [[nodiscard]] void *llvm_omp_target_lock_mem(void *Ptr, size_t Size, 155 int DeviceNum) { 156 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 157 return targetLockExplicit(Ptr, Size, DeviceNum, __func__); 158 } 159 160 EXTERN void llvm_omp_target_unlock_mem(void *Ptr, int DeviceNum) { 161 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 162 targetUnlockExplicit(Ptr, DeviceNum, __func__); 163 } 164 165 EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { 166 TIMESCOPE(); 167 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 168 DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n", 169 DeviceNum, DPxPTR(Ptr)); 170 171 if (!Ptr) { 172 DP("Call to omp_target_is_present with NULL ptr, returning false\n"); 173 return false; 174 } 175 176 if (DeviceNum == omp_get_initial_device()) { 177 DP("Call to omp_target_is_present on host, returning true\n"); 178 return true; 179 } 180 181 auto DeviceOrErr = PM->getDevice(DeviceNum); 182 if (!DeviceOrErr) 183 FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); 184 185 // omp_target_is_present tests whether a host pointer refers to storage that 186 // is mapped to a given device. However, due to the lack of the storage size, 187 // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero 188 // lengh array) is mapped instead of the referred storage. 189 TargetPointerResultTy TPR = 190 DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1, 191 /*UpdateRefCount=*/false, 192 /*UseHoldRefCount=*/false); 193 int Rc = TPR.isPresent(); 194 DP("Call to omp_target_is_present returns %d\n", Rc); 195 return Rc; 196 } 197 198 EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, 199 size_t DstOffset, size_t SrcOffset, int DstDevice, 200 int SrcDevice) { 201 TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + 202 ";src_dev=" + std::to_string(SrcDevice) + 203 ";size=" + std::to_string(Length)); 204 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 205 DP("Call to omp_target_memcpy, dst device %d, src device %d, " 206 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " 207 "src offset %zu, length %zu\n", 208 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, 209 Length); 210 211 if (!Dst || !Src || Length <= 0) { 212 if (Length == 0) { 213 DP("Call to omp_target_memcpy with zero length, nothing to do\n"); 214 return OFFLOAD_SUCCESS; 215 } 216 217 REPORT("Call to omp_target_memcpy with invalid arguments\n"); 218 return OFFLOAD_FAIL; 219 } 220 221 int Rc = OFFLOAD_SUCCESS; 222 void *SrcAddr = (char *)const_cast<void *>(Src) + SrcOffset; 223 void *DstAddr = (char *)Dst + DstOffset; 224 225 if (SrcDevice == omp_get_initial_device() && 226 DstDevice == omp_get_initial_device()) { 227 DP("copy from host to host\n"); 228 const void *P = memcpy(DstAddr, SrcAddr, Length); 229 if (P == NULL) 230 Rc = OFFLOAD_FAIL; 231 } else if (SrcDevice == omp_get_initial_device()) { 232 DP("copy from host to device\n"); 233 auto DstDeviceOrErr = PM->getDevice(DstDevice); 234 if (!DstDeviceOrErr) 235 FATAL_MESSAGE(DstDevice, "%s", 236 toString(DstDeviceOrErr.takeError()).c_str()); 237 AsyncInfoTy AsyncInfo(*DstDeviceOrErr); 238 Rc = DstDeviceOrErr->submitData(DstAddr, SrcAddr, Length, AsyncInfo); 239 } else if (DstDevice == omp_get_initial_device()) { 240 DP("copy from device to host\n"); 241 auto SrcDeviceOrErr = PM->getDevice(SrcDevice); 242 if (!SrcDeviceOrErr) 243 FATAL_MESSAGE(SrcDevice, "%s", 244 toString(SrcDeviceOrErr.takeError()).c_str()); 245 AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); 246 Rc = SrcDeviceOrErr->retrieveData(DstAddr, SrcAddr, Length, AsyncInfo); 247 } else { 248 DP("copy from device to device\n"); 249 auto SrcDeviceOrErr = PM->getDevice(SrcDevice); 250 if (!SrcDeviceOrErr) 251 FATAL_MESSAGE(SrcDevice, "%s", 252 toString(SrcDeviceOrErr.takeError()).c_str()); 253 AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); 254 auto DstDeviceOrErr = PM->getDevice(DstDevice); 255 if (!DstDeviceOrErr) 256 FATAL_MESSAGE(DstDevice, "%s", 257 toString(DstDeviceOrErr.takeError()).c_str()); 258 // First try to use D2D memcpy which is more efficient. If fails, fall back 259 // to unefficient way. 260 if (SrcDeviceOrErr->isDataExchangable(*DstDeviceOrErr)) { 261 AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); 262 Rc = SrcDeviceOrErr->dataExchange(SrcAddr, *DstDeviceOrErr, DstAddr, 263 Length, AsyncInfo); 264 if (Rc == OFFLOAD_SUCCESS) 265 return OFFLOAD_SUCCESS; 266 } 267 268 void *Buffer = malloc(Length); 269 { 270 AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); 271 Rc = SrcDeviceOrErr->retrieveData(Buffer, SrcAddr, Length, AsyncInfo); 272 } 273 if (Rc == OFFLOAD_SUCCESS) { 274 AsyncInfoTy AsyncInfo(*DstDeviceOrErr); 275 Rc = DstDeviceOrErr->submitData(DstAddr, Buffer, Length, AsyncInfo); 276 } 277 free(Buffer); 278 } 279 280 DP("omp_target_memcpy returns %d\n", Rc); 281 return Rc; 282 } 283 284 // The helper function that calls omp_target_memcpy or omp_target_memcpy_rect 285 static int libomp_target_memcpy_async_task(int32_t Gtid, kmp_task_t *Task) { 286 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 287 if (Task == nullptr) 288 return OFFLOAD_FAIL; 289 290 TargetMemcpyArgsTy *Args = (TargetMemcpyArgsTy *)Task->shareds; 291 292 if (Args == nullptr) 293 return OFFLOAD_FAIL; 294 295 // Call blocked version 296 int Rc = OFFLOAD_SUCCESS; 297 if (Args->IsRectMemcpy) { 298 Rc = omp_target_memcpy_rect( 299 Args->Dst, Args->Src, Args->ElementSize, Args->NumDims, Args->Volume, 300 Args->DstOffsets, Args->SrcOffsets, Args->DstDimensions, 301 Args->SrcDimensions, Args->DstDevice, Args->SrcDevice); 302 303 DP("omp_target_memcpy_rect returns %d\n", Rc); 304 } else { 305 Rc = omp_target_memcpy(Args->Dst, Args->Src, Args->Length, Args->DstOffset, 306 Args->SrcOffset, Args->DstDevice, Args->SrcDevice); 307 308 DP("omp_target_memcpy returns %d\n", Rc); 309 } 310 311 // Release the arguments object 312 delete Args; 313 314 return Rc; 315 } 316 317 static int libomp_target_memset_async_task(int32_t Gtid, kmp_task_t *Task) { 318 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 319 if (!Task) 320 return OFFLOAD_FAIL; 321 322 auto *Args = reinterpret_cast<TargetMemsetArgsTy *>(Task->shareds); 323 if (!Args) 324 return OFFLOAD_FAIL; 325 326 // call omp_target_memset() 327 omp_target_memset(Args->Ptr, Args->C, Args->N, Args->DeviceNum); 328 329 delete Args; 330 331 return OFFLOAD_SUCCESS; 332 } 333 334 static inline void 335 convertDepObjVector(llvm::SmallVector<kmp_depend_info_t> &Vec, int DepObjCount, 336 omp_depend_t *DepObjList) { 337 for (int i = 0; i < DepObjCount; ++i) { 338 omp_depend_t DepObj = DepObjList[i]; 339 Vec.push_back(*((kmp_depend_info_t *)DepObj)); 340 } 341 } 342 343 template <class T> 344 static inline int 345 libomp_helper_task_creation(T *Args, int (*Fn)(int32_t, kmp_task_t *), 346 int DepObjCount, omp_depend_t *DepObjList) { 347 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 348 // Create global thread ID 349 int Gtid = __kmpc_global_thread_num(nullptr); 350 351 // Setup the hidden helper flags 352 int32_t Flags = 0; 353 kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags; 354 InputFlags->hidden_helper = 1; 355 356 // Alloc the helper task 357 kmp_task_t *Task = __kmpc_omp_target_task_alloc( 358 nullptr, Gtid, Flags, sizeof(kmp_task_t), 0, Fn, -1); 359 if (!Task) { 360 delete Args; 361 return OFFLOAD_FAIL; 362 } 363 364 // Setup the arguments for the helper task 365 Task->shareds = Args; 366 367 // Convert types of depend objects 368 llvm::SmallVector<kmp_depend_info_t> DepObjs; 369 convertDepObjVector(DepObjs, DepObjCount, DepObjList); 370 371 // Launch the helper task 372 int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Task, DepObjCount, 373 DepObjs.data(), 0, nullptr); 374 375 return Rc; 376 } 377 378 EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes, 379 int DeviceNum) { 380 TIMESCOPE(); 381 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 382 DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n", 383 DeviceNum, Ptr, NumBytes); 384 385 // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation 386 // of unspecified behavior, see OpenMP spec). 387 if (!Ptr || NumBytes == 0) { 388 return Ptr; 389 } 390 391 if (DeviceNum == omp_get_initial_device()) { 392 DP("filling memory on host via memset"); 393 memset(Ptr, ByteVal, NumBytes); // ignore return value, memset() cannot fail 394 } else { 395 // TODO: replace the omp_target_memset() slow path with the fast path. 396 // That will require the ability to execute a kernel from within 397 // libomptarget.so (which we do not have at the moment). 398 399 // This is a very slow path: create a filled array on the host and upload 400 // it to the GPU device. 401 int InitialDevice = omp_get_initial_device(); 402 void *Shadow = omp_target_alloc(NumBytes, InitialDevice); 403 if (Shadow) { 404 (void)memset(Shadow, ByteVal, NumBytes); 405 (void)omp_target_memcpy(Ptr, Shadow, NumBytes, 0, 0, DeviceNum, 406 InitialDevice); 407 (void)omp_target_free(Shadow, InitialDevice); 408 } else { 409 // If the omp_target_alloc has failed, let's just not do anything. 410 // omp_target_memset does not have any good way to fail, so we 411 // simply avoid a catastrophic failure of the process for now. 412 DP("omp_target_memset failed to fill memory due to error with " 413 "omp_target_alloc"); 414 } 415 } 416 417 DP("omp_target_memset returns %p\n", Ptr); 418 return Ptr; 419 } 420 421 EXTERN void *omp_target_memset_async(void *Ptr, int ByteVal, size_t NumBytes, 422 int DeviceNum, int DepObjCount, 423 omp_depend_t *DepObjList) { 424 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 425 DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu", 426 DeviceNum, Ptr, NumBytes); 427 428 // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation 429 // of unspecified behavior, see OpenMP spec). 430 if (!Ptr || NumBytes == 0) 431 return Ptr; 432 433 // Create the task object to deal with the async invocation 434 auto *Args = new TargetMemsetArgsTy{Ptr, ByteVal, NumBytes, DeviceNum}; 435 436 // omp_target_memset_async() cannot fail via a return code, so ignore the 437 // return code of the helper function 438 (void)libomp_helper_task_creation(Args, &libomp_target_memset_async_task, 439 DepObjCount, DepObjList); 440 441 return Ptr; 442 } 443 444 EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, 445 size_t DstOffset, size_t SrcOffset, 446 int DstDevice, int SrcDevice, 447 int DepObjCount, omp_depend_t *DepObjList) { 448 TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + 449 ";src_dev=" + std::to_string(SrcDevice) + 450 ";size=" + std::to_string(Length)); 451 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 452 DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " 453 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " 454 "src offset %zu, length %zu\n", 455 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, 456 Length); 457 458 // Check the source and dest address 459 if (Dst == nullptr || Src == nullptr) 460 return OFFLOAD_FAIL; 461 462 // Create task object 463 TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy( 464 Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice); 465 466 // Create and launch helper task 467 int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task, 468 DepObjCount, DepObjList); 469 470 DP("omp_target_memcpy_async returns %d\n", Rc); 471 return Rc; 472 } 473 474 EXTERN int 475 omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, 476 int NumDims, const size_t *Volume, 477 const size_t *DstOffsets, const size_t *SrcOffsets, 478 const size_t *DstDimensions, const size_t *SrcDimensions, 479 int DstDevice, int SrcDevice) { 480 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 481 DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, " 482 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " 483 "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " 484 "volume " DPxMOD ", element size %zu, num_dims %d\n", 485 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), 486 DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), 487 DPxPTR(Volume), ElementSize, NumDims); 488 489 if (!(Dst || Src)) { 490 DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n", 491 INT_MAX); 492 return INT_MAX; 493 } 494 495 if (!Dst || !Src || ElementSize < 1 || NumDims < 1 || !Volume || 496 !DstOffsets || !SrcOffsets || !DstDimensions || !SrcDimensions) { 497 REPORT("Call to omp_target_memcpy_rect with invalid arguments\n"); 498 return OFFLOAD_FAIL; 499 } 500 501 int Rc; 502 if (NumDims == 1) { 503 Rc = omp_target_memcpy(Dst, Src, ElementSize * Volume[0], 504 ElementSize * DstOffsets[0], 505 ElementSize * SrcOffsets[0], DstDevice, SrcDevice); 506 } else { 507 size_t DstSliceSize = ElementSize; 508 size_t SrcSliceSize = ElementSize; 509 for (int I = 1; I < NumDims; ++I) { 510 DstSliceSize *= DstDimensions[I]; 511 SrcSliceSize *= SrcDimensions[I]; 512 } 513 514 size_t DstOff = DstOffsets[0] * DstSliceSize; 515 size_t SrcOff = SrcOffsets[0] * SrcSliceSize; 516 for (size_t I = 0; I < Volume[0]; ++I) { 517 Rc = omp_target_memcpy_rect( 518 (char *)Dst + DstOff + DstSliceSize * I, 519 (char *)const_cast<void *>(Src) + SrcOff + SrcSliceSize * I, 520 ElementSize, NumDims - 1, Volume + 1, DstOffsets + 1, SrcOffsets + 1, 521 DstDimensions + 1, SrcDimensions + 1, DstDevice, SrcDevice); 522 523 if (Rc) { 524 DP("Recursive call to omp_target_memcpy_rect returns unsuccessfully\n"); 525 return Rc; 526 } 527 } 528 } 529 530 DP("omp_target_memcpy_rect returns %d\n", Rc); 531 return Rc; 532 } 533 534 EXTERN int omp_target_memcpy_rect_async( 535 void *Dst, const void *Src, size_t ElementSize, int NumDims, 536 const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets, 537 const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, 538 int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { 539 TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + 540 ";src_dev=" + std::to_string(SrcDevice) + 541 ";size=" + std::to_string(ElementSize) + 542 ";num_dims=" + std::to_string(NumDims)); 543 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 544 DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " 545 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " 546 "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " 547 "volume " DPxMOD ", element size %zu, num_dims %d\n", 548 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), 549 DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), 550 DPxPTR(Volume), ElementSize, NumDims); 551 552 // Need to check this first to not return OFFLOAD_FAIL instead 553 if (!Dst && !Src) { 554 DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n", 555 INT_MAX); 556 return INT_MAX; 557 } 558 559 // Check the source and dest address 560 if (Dst == nullptr || Src == nullptr) 561 return OFFLOAD_FAIL; 562 563 // Create task object 564 TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy( 565 Dst, Src, ElementSize, NumDims, Volume, DstOffsets, SrcOffsets, 566 DstDimensions, SrcDimensions, DstDevice, SrcDevice); 567 568 // Create and launch helper task 569 int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task, 570 DepObjCount, DepObjList); 571 572 DP("omp_target_memcpy_rect_async returns %d\n", Rc); 573 return Rc; 574 } 575 576 EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, 577 size_t Size, size_t DeviceOffset, 578 int DeviceNum) { 579 TIMESCOPE(); 580 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 581 DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", " 582 "device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n", 583 DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum); 584 585 if (!HostPtr || !DevicePtr || Size <= 0) { 586 REPORT("Call to omp_target_associate_ptr with invalid arguments\n"); 587 return OFFLOAD_FAIL; 588 } 589 590 if (DeviceNum == omp_get_initial_device()) { 591 REPORT("omp_target_associate_ptr: no association possible on the host\n"); 592 return OFFLOAD_FAIL; 593 } 594 595 auto DeviceOrErr = PM->getDevice(DeviceNum); 596 if (!DeviceOrErr) 597 FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); 598 599 void *DeviceAddr = (void *)((uint64_t)DevicePtr + (uint64_t)DeviceOffset); 600 601 OMPT_IF_BUILT(InterfaceRAII( 602 RegionInterface.getCallbacks<ompt_target_data_associate>(), DeviceNum, 603 const_cast<void *>(HostPtr), const_cast<void *>(DevicePtr), Size, 604 __builtin_return_address(0))); 605 606 int Rc = DeviceOrErr->getMappingInfo().associatePtr( 607 const_cast<void *>(HostPtr), const_cast<void *>(DeviceAddr), Size); 608 DP("omp_target_associate_ptr returns %d\n", Rc); 609 return Rc; 610 } 611 612 EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) { 613 TIMESCOPE(); 614 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 615 DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", " 616 "device_num %d\n", 617 DPxPTR(HostPtr), DeviceNum); 618 619 if (!HostPtr) { 620 REPORT("Call to omp_target_associate_ptr with invalid host_ptr\n"); 621 return OFFLOAD_FAIL; 622 } 623 624 if (DeviceNum == omp_get_initial_device()) { 625 REPORT( 626 "omp_target_disassociate_ptr: no association possible on the host\n"); 627 return OFFLOAD_FAIL; 628 } 629 630 auto DeviceOrErr = PM->getDevice(DeviceNum); 631 if (!DeviceOrErr) 632 FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); 633 634 OMPT_IF_BUILT(InterfaceRAII( 635 RegionInterface.getCallbacks<ompt_target_data_disassociate>(), DeviceNum, 636 const_cast<void *>(HostPtr), 637 /*DevicePtr=*/nullptr, /*Size=*/0, __builtin_return_address(0))); 638 639 int Rc = DeviceOrErr->getMappingInfo().disassociatePtr( 640 const_cast<void *>(HostPtr)); 641 DP("omp_target_disassociate_ptr returns %d\n", Rc); 642 return Rc; 643 } 644 645 EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { 646 TIMESCOPE(); 647 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); 648 DP("Call to omp_get_mapped_ptr with ptr " DPxMOD ", device_num %d.\n", 649 DPxPTR(Ptr), DeviceNum); 650 651 if (!Ptr) { 652 REPORT("Call to omp_get_mapped_ptr with nullptr.\n"); 653 return nullptr; 654 } 655 656 int NumDevices = omp_get_initial_device(); 657 if (DeviceNum == NumDevices) { 658 DP("Device %d is initial device, returning Ptr " DPxMOD ".\n", 659 DeviceNum, DPxPTR(Ptr)); 660 return const_cast<void *>(Ptr); 661 } 662 663 if (NumDevices <= DeviceNum) { 664 DP("DeviceNum %d is invalid, returning nullptr.\n", DeviceNum); 665 return nullptr; 666 } 667 668 auto DeviceOrErr = PM->getDevice(DeviceNum); 669 if (!DeviceOrErr) 670 FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); 671 672 TargetPointerResultTy TPR = 673 DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1, 674 /*UpdateRefCount=*/false, 675 /*UseHoldRefCount=*/false); 676 if (!TPR.isPresent()) { 677 DP("Ptr " DPxMOD "is not present on device %d, returning nullptr.\n", 678 DPxPTR(Ptr), DeviceNum); 679 return nullptr; 680 } 681 682 DP("omp_get_mapped_ptr returns " DPxMOD ".\n", DPxPTR(TPR.TargetPointer)); 683 684 return TPR.TargetPointer; 685 } 686