1 //===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // RTL NextGen for generic 64-bit machine 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <cassert> 14 #include <cstddef> 15 #include <ffi.h> 16 #include <string> 17 #include <unordered_map> 18 19 #include "Shared/Debug.h" 20 #include "Shared/Environment.h" 21 #include "Utils/ELF.h" 22 23 #include "GlobalHandler.h" 24 #include "OpenMP/OMPT/Callback.h" 25 #include "PluginInterface.h" 26 #include "omptarget.h" 27 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/Frontend/OpenMP/OMPConstants.h" 30 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" 31 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 32 #include "llvm/Support/DynamicLibrary.h" 33 34 #if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ 35 !defined(__ORDER_BIG_ENDIAN__) 36 #error "Missing preprocessor definitions for endianness detection." 37 #endif 38 39 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 40 #define LITTLEENDIAN_CPU 41 #elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 42 #define BIGENDIAN_CPU 43 #endif 44 45 // The number of devices in this plugin. 46 #define NUM_DEVICES 4 47 48 namespace llvm { 49 namespace omp { 50 namespace target { 51 namespace plugin { 52 53 /// Forward declarations for all specialized data structures. 54 struct GenELF64KernelTy; 55 struct GenELF64DeviceTy; 56 struct GenELF64PluginTy; 57 58 using llvm::sys::DynamicLibrary; 59 60 /// Class implementing kernel functionalities for GenELF64. 61 struct GenELF64KernelTy : public GenericKernelTy { 62 /// Construct the kernel with a name and an execution mode. 63 GenELF64KernelTy(const char *Name) : GenericKernelTy(Name), Func(nullptr) {} 64 65 /// Initialize the kernel. 66 Error initImpl(GenericDeviceTy &Device, DeviceImageTy &Image) override { 67 // Functions have zero size. 68 GlobalTy Global(getName(), 0); 69 70 // Get the metadata (address) of the kernel function. 71 GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler(); 72 if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, Global)) 73 return Err; 74 75 // Check that the function pointer is valid. 76 if (!Global.getPtr()) 77 return Plugin::error("Invalid function for kernel %s", getName()); 78 79 // Save the function pointer. 80 Func = (void (*)())Global.getPtr(); 81 82 KernelEnvironment.Configuration.ExecMode = OMP_TGT_EXEC_MODE_GENERIC; 83 KernelEnvironment.Configuration.MayUseNestedParallelism = /*Unknown=*/2; 84 KernelEnvironment.Configuration.UseGenericStateMachine = /*Unknown=*/2; 85 86 // Set the maximum number of threads to a single. 87 MaxNumThreads = 1; 88 return Plugin::success(); 89 } 90 91 /// Launch the kernel using the libffi. 92 Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads[3], 93 uint32_t NumBlocks[3], KernelArgsTy &KernelArgs, 94 KernelLaunchParamsTy LaunchParams, 95 AsyncInfoWrapperTy &AsyncInfoWrapper) const override { 96 // Create a vector of ffi_types, one per argument. 97 SmallVector<ffi_type *, 16> ArgTypes(KernelArgs.NumArgs, &ffi_type_pointer); 98 ffi_type **ArgTypesPtr = (ArgTypes.size()) ? &ArgTypes[0] : nullptr; 99 100 // Prepare the cif structure before running the kernel function. 101 ffi_cif Cif; 102 ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, KernelArgs.NumArgs, 103 &ffi_type_void, ArgTypesPtr); 104 if (Status != FFI_OK) 105 return Plugin::error("Error in ffi_prep_cif: %d", Status); 106 107 // Call the kernel function through libffi. 108 long Return; 109 ffi_call(&Cif, Func, &Return, (void **)LaunchParams.Ptrs); 110 111 return Plugin::success(); 112 } 113 114 private: 115 /// The kernel function to execute. 116 void (*Func)(void); 117 }; 118 119 /// Class implementing the GenELF64 device images properties. 120 struct GenELF64DeviceImageTy : public DeviceImageTy { 121 /// Create the GenELF64 image with the id and the target image pointer. 122 GenELF64DeviceImageTy(int32_t ImageId, GenericDeviceTy &Device, 123 const __tgt_device_image *TgtImage) 124 : DeviceImageTy(ImageId, Device, TgtImage), DynLib() {} 125 126 /// Getter and setter for the dynamic library. 127 DynamicLibrary &getDynamicLibrary() { return DynLib; } 128 void setDynamicLibrary(const DynamicLibrary &Lib) { DynLib = Lib; } 129 130 private: 131 /// The dynamic library that loaded the image. 132 DynamicLibrary DynLib; 133 }; 134 135 /// Class implementing the device functionalities for GenELF64. 136 struct GenELF64DeviceTy : public GenericDeviceTy { 137 /// Create the device with a specific id. 138 GenELF64DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, 139 int32_t NumDevices) 140 : GenericDeviceTy(Plugin, DeviceId, NumDevices, GenELF64GridValues) {} 141 142 ~GenELF64DeviceTy() {} 143 144 /// Initialize the device, which is a no-op 145 Error initImpl(GenericPluginTy &Plugin) override { return Plugin::success(); } 146 147 /// Deinitialize the device, which is a no-op 148 Error deinitImpl() override { return Plugin::success(); } 149 150 /// See GenericDeviceTy::getComputeUnitKind(). 151 std::string getComputeUnitKind() const override { return "generic-64bit"; } 152 153 /// Construct the kernel for a specific image on the device. 154 Expected<GenericKernelTy &> constructKernel(const char *Name) override { 155 // Allocate and construct the kernel. 156 GenELF64KernelTy *GenELF64Kernel = Plugin.allocate<GenELF64KernelTy>(); 157 if (!GenELF64Kernel) 158 return Plugin::error("Failed to allocate memory for GenELF64 kernel"); 159 160 new (GenELF64Kernel) GenELF64KernelTy(Name); 161 162 return *GenELF64Kernel; 163 } 164 165 /// Set the current context to this device, which is a no-op. 166 Error setContext() override { return Plugin::success(); } 167 168 /// Load the binary image into the device and allocate an image object. 169 Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage, 170 int32_t ImageId) override { 171 // Allocate and initialize the image object. 172 GenELF64DeviceImageTy *Image = Plugin.allocate<GenELF64DeviceImageTy>(); 173 new (Image) GenELF64DeviceImageTy(ImageId, *this, TgtImage); 174 175 // Create a temporary file. 176 char TmpFileName[] = "/tmp/tmpfile_XXXXXX"; 177 int TmpFileFd = mkstemp(TmpFileName); 178 if (TmpFileFd == -1) 179 return Plugin::error("Failed to create tmpfile for loading target image"); 180 181 // Open the temporary file. 182 FILE *TmpFile = fdopen(TmpFileFd, "wb"); 183 if (!TmpFile) 184 return Plugin::error("Failed to open tmpfile %s for loading target image", 185 TmpFileName); 186 187 // Write the image into the temporary file. 188 size_t Written = fwrite(Image->getStart(), Image->getSize(), 1, TmpFile); 189 if (Written != 1) 190 return Plugin::error("Failed to write target image to tmpfile %s", 191 TmpFileName); 192 193 // Close the temporary file. 194 int Ret = fclose(TmpFile); 195 if (Ret) 196 return Plugin::error("Failed to close tmpfile %s with the target image", 197 TmpFileName); 198 199 // Load the temporary file as a dynamic library. 200 std::string ErrMsg; 201 DynamicLibrary DynLib = 202 DynamicLibrary::getPermanentLibrary(TmpFileName, &ErrMsg); 203 204 // Check if the loaded library is valid. 205 if (!DynLib.isValid()) 206 return Plugin::error("Failed to load target image: %s", ErrMsg.c_str()); 207 208 // Save a reference of the image's dynamic library. 209 Image->setDynamicLibrary(DynLib); 210 211 return Image; 212 } 213 214 /// Allocate memory. Use std::malloc in all cases. 215 void *allocate(size_t Size, void *, TargetAllocTy Kind) override { 216 if (Size == 0) 217 return nullptr; 218 219 void *MemAlloc = nullptr; 220 switch (Kind) { 221 case TARGET_ALLOC_DEFAULT: 222 case TARGET_ALLOC_DEVICE: 223 case TARGET_ALLOC_HOST: 224 case TARGET_ALLOC_SHARED: 225 case TARGET_ALLOC_DEVICE_NON_BLOCKING: 226 MemAlloc = std::malloc(Size); 227 break; 228 } 229 return MemAlloc; 230 } 231 232 /// Free the memory. Use std::free in all cases. 233 int free(void *TgtPtr, TargetAllocTy Kind) override { 234 std::free(TgtPtr); 235 return OFFLOAD_SUCCESS; 236 } 237 238 /// This plugin does nothing to lock buffers. Do not return an error, just 239 /// return the same pointer as the device pointer. 240 Expected<void *> dataLockImpl(void *HstPtr, int64_t Size) override { 241 return HstPtr; 242 } 243 244 /// Nothing to do when unlocking the buffer. 245 Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); } 246 247 /// Indicate that the buffer is not pinned. 248 Expected<bool> isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr, 249 void *&BaseDevAccessiblePtr, 250 size_t &BaseSize) const override { 251 return false; 252 } 253 254 /// Submit data to the device (host to device transfer). 255 Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, 256 AsyncInfoWrapperTy &AsyncInfoWrapper) override { 257 std::memcpy(TgtPtr, HstPtr, Size); 258 return Plugin::success(); 259 } 260 261 /// Retrieve data from the device (device to host transfer). 262 Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size, 263 AsyncInfoWrapperTy &AsyncInfoWrapper) override { 264 std::memcpy(HstPtr, TgtPtr, Size); 265 return Plugin::success(); 266 } 267 268 /// Exchange data between two devices within the plugin. This function is not 269 /// supported in this plugin. 270 Error dataExchangeImpl(const void *SrcPtr, GenericDeviceTy &DstGenericDevice, 271 void *DstPtr, int64_t Size, 272 AsyncInfoWrapperTy &AsyncInfoWrapper) override { 273 // This function should never be called because the function 274 // GenELF64PluginTy::isDataExchangable() returns false. 275 return Plugin::error("dataExchangeImpl not supported"); 276 } 277 278 /// All functions are already synchronous. No need to do anything on this 279 /// synchronization function. 280 Error synchronizeImpl(__tgt_async_info &AsyncInfo) override { 281 return Plugin::success(); 282 } 283 284 /// All functions are already synchronous. No need to do anything on this 285 /// query function. 286 Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override { 287 return Plugin::success(); 288 } 289 290 /// This plugin does not support interoperability 291 Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override { 292 return Plugin::error("initAsyncInfoImpl not supported"); 293 } 294 295 /// This plugin does not support interoperability 296 Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) override { 297 return Plugin::error("initDeviceInfoImpl not supported"); 298 } 299 300 /// This plugin does not support the event API. Do nothing without failing. 301 Error createEventImpl(void **EventPtrStorage) override { 302 *EventPtrStorage = nullptr; 303 return Plugin::success(); 304 } 305 Error destroyEventImpl(void *EventPtr) override { return Plugin::success(); } 306 Error recordEventImpl(void *EventPtr, 307 AsyncInfoWrapperTy &AsyncInfoWrapper) override { 308 return Plugin::success(); 309 } 310 Error waitEventImpl(void *EventPtr, 311 AsyncInfoWrapperTy &AsyncInfoWrapper) override { 312 return Plugin::success(); 313 } 314 Error syncEventImpl(void *EventPtr) override { return Plugin::success(); } 315 316 /// Print information about the device. 317 Error obtainInfoImpl(InfoQueueTy &Info) override { 318 Info.add("Device Type", "Generic-elf-64bit"); 319 return Plugin::success(); 320 } 321 322 /// This plugin should not setup the device environment or memory pool. 323 virtual bool shouldSetupDeviceEnvironment() const override { return false; }; 324 virtual bool shouldSetupDeviceMemoryPool() const override { return false; }; 325 326 /// Getters and setters for stack size and heap size not relevant. 327 Error getDeviceStackSize(uint64_t &Value) override { 328 Value = 0; 329 return Plugin::success(); 330 } 331 Error setDeviceStackSize(uint64_t Value) override { 332 return Plugin::success(); 333 } 334 Error getDeviceHeapSize(uint64_t &Value) override { 335 Value = 0; 336 return Plugin::success(); 337 } 338 Error setDeviceHeapSize(uint64_t Value) override { return Plugin::success(); } 339 340 private: 341 /// Grid values for Generic ELF64 plugins. 342 static constexpr GV GenELF64GridValues = { 343 1, // GV_Slot_Size 344 1, // GV_Warp_Size 345 1, // GV_Max_Teams 346 1, // GV_Default_Num_Teams 347 1, // GV_SimpleBufferSize 348 1, // GV_Max_WG_Size 349 1, // GV_Default_WG_Size 350 }; 351 }; 352 353 class GenELF64GlobalHandlerTy final : public GenericGlobalHandlerTy { 354 public: 355 Error getGlobalMetadataFromDevice(GenericDeviceTy &GenericDevice, 356 DeviceImageTy &Image, 357 GlobalTy &DeviceGlobal) override { 358 const char *GlobalName = DeviceGlobal.getName().data(); 359 GenELF64DeviceImageTy &GenELF64Image = 360 static_cast<GenELF64DeviceImageTy &>(Image); 361 362 // Get dynamic library that has loaded the device image. 363 DynamicLibrary &DynLib = GenELF64Image.getDynamicLibrary(); 364 365 // Get the address of the symbol. 366 void *Addr = DynLib.getAddressOfSymbol(GlobalName); 367 if (Addr == nullptr) { 368 return Plugin::error("Failed to load global '%s'", GlobalName); 369 } 370 371 // Save the pointer to the symbol. 372 DeviceGlobal.setPtr(Addr); 373 374 return Plugin::success(); 375 } 376 }; 377 378 /// Class implementing the plugin functionalities for GenELF64. 379 struct GenELF64PluginTy final : public GenericPluginTy { 380 /// Create the GenELF64 plugin. 381 GenELF64PluginTy() : GenericPluginTy(getTripleArch()) {} 382 383 /// This class should not be copied. 384 GenELF64PluginTy(const GenELF64PluginTy &) = delete; 385 GenELF64PluginTy(GenELF64PluginTy &&) = delete; 386 387 /// Initialize the plugin and return the number of devices. 388 Expected<int32_t> initImpl() override { 389 #ifdef USES_DYNAMIC_FFI 390 if (auto Err = Plugin::check(ffi_init(), "Failed to initialize libffi")) 391 return std::move(Err); 392 #endif 393 394 return NUM_DEVICES; 395 } 396 397 /// Deinitialize the plugin. 398 Error deinitImpl() override { return Plugin::success(); } 399 400 /// Creates a generic ELF device. 401 GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId, 402 int32_t NumDevices) override { 403 return new GenELF64DeviceTy(Plugin, DeviceId, NumDevices); 404 } 405 406 /// Creates a generic global handler. 407 GenericGlobalHandlerTy *createGlobalHandler() override { 408 return new GenELF64GlobalHandlerTy(); 409 } 410 411 /// Get the ELF code to recognize the compatible binary images. 412 uint16_t getMagicElfBits() const override { 413 return utils::elf::getTargetMachine(); 414 } 415 416 /// This plugin does not support exchanging data between two devices. 417 bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override { 418 return false; 419 } 420 421 /// All images (ELF-compatible) should be compatible with this plugin. 422 Expected<bool> isELFCompatible(uint32_t, StringRef) const override { 423 return true; 424 } 425 426 Triple::ArchType getTripleArch() const override { 427 #if defined(__x86_64__) 428 return llvm::Triple::x86_64; 429 #elif defined(__s390x__) 430 return llvm::Triple::systemz; 431 #elif defined(__aarch64__) 432 #ifdef LITTLEENDIAN_CPU 433 return llvm::Triple::aarch64; 434 #else 435 return llvm::Triple::aarch64_be; 436 #endif 437 #elif defined(__powerpc64__) 438 #ifdef LITTLEENDIAN_CPU 439 return llvm::Triple::ppc64le; 440 #else 441 return llvm::Triple::ppc64; 442 #endif 443 #elif defined(__riscv) && (__riscv_xlen == 64) 444 return llvm::Triple::riscv64; 445 #elif defined(__loongarch__) && (__loongarch_grlen == 64) 446 return llvm::Triple::loongarch64; 447 #else 448 return llvm::Triple::UnknownArch; 449 #endif 450 } 451 452 const char *getName() const override { return GETNAME(TARGET_NAME); } 453 }; 454 455 template <typename... ArgsTy> 456 static Error Plugin::check(int32_t Code, const char *ErrMsg, ArgsTy... Args) { 457 if (Code == 0) 458 return Error::success(); 459 460 return createStringError<ArgsTy..., const char *>( 461 inconvertibleErrorCode(), ErrMsg, Args..., std::to_string(Code).data()); 462 } 463 464 } // namespace plugin 465 } // namespace target 466 } // namespace omp 467 } // namespace llvm 468 469 extern "C" { 470 llvm::omp::target::plugin::GenericPluginTy *createPlugin_host() { 471 return new llvm::omp::target::plugin::GenELF64PluginTy(); 472 } 473 } 474