19fa9d9a7SFabian Mora //===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===// 29fa9d9a7SFabian Mora // 39fa9d9a7SFabian Mora // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49fa9d9a7SFabian Mora // See https://llvm.org/LICENSE.txt for license information. 59fa9d9a7SFabian Mora // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69fa9d9a7SFabian Mora // 79fa9d9a7SFabian Mora //===----------------------------------------------------------------------===// 89fa9d9a7SFabian Mora 99fa9d9a7SFabian Mora #include "llvm/Frontend/Offloading/OffloadWrapper.h" 109fa9d9a7SFabian Mora #include "llvm/ADT/ArrayRef.h" 119fa9d9a7SFabian Mora #include "llvm/BinaryFormat/Magic.h" 129fa9d9a7SFabian Mora #include "llvm/Frontend/Offloading/Utility.h" 139fa9d9a7SFabian Mora #include "llvm/IR/Constants.h" 149fa9d9a7SFabian Mora #include "llvm/IR/GlobalVariable.h" 159fa9d9a7SFabian Mora #include "llvm/IR/IRBuilder.h" 169fa9d9a7SFabian Mora #include "llvm/IR/LLVMContext.h" 179fa9d9a7SFabian Mora #include "llvm/IR/Module.h" 189fa9d9a7SFabian Mora #include "llvm/Object/OffloadBinary.h" 199fa9d9a7SFabian Mora #include "llvm/Support/Error.h" 209fa9d9a7SFabian Mora #include "llvm/TargetParser/Triple.h" 219fa9d9a7SFabian Mora #include "llvm/Transforms/Utils/ModuleUtils.h" 229fa9d9a7SFabian Mora 239fa9d9a7SFabian Mora using namespace llvm; 249fa9d9a7SFabian Mora using namespace llvm::offloading; 259fa9d9a7SFabian Mora 269fa9d9a7SFabian Mora namespace { 279fa9d9a7SFabian Mora /// Magic number that begins the section containing the CUDA fatbinary. 289fa9d9a7SFabian Mora constexpr unsigned CudaFatMagic = 0x466243b1; 299fa9d9a7SFabian Mora constexpr unsigned HIPFatMagic = 0x48495046; 309fa9d9a7SFabian Mora 319fa9d9a7SFabian Mora IntegerType *getSizeTTy(Module &M) { 329fa9d9a7SFabian Mora return M.getDataLayout().getIntPtrType(M.getContext()); 339fa9d9a7SFabian Mora } 349fa9d9a7SFabian Mora 359fa9d9a7SFabian Mora // struct __tgt_device_image { 369fa9d9a7SFabian Mora // void *ImageStart; 379fa9d9a7SFabian Mora // void *ImageEnd; 389fa9d9a7SFabian Mora // __tgt_offload_entry *EntriesBegin; 399fa9d9a7SFabian Mora // __tgt_offload_entry *EntriesEnd; 409fa9d9a7SFabian Mora // }; 419fa9d9a7SFabian Mora StructType *getDeviceImageTy(Module &M) { 429fa9d9a7SFabian Mora LLVMContext &C = M.getContext(); 439fa9d9a7SFabian Mora StructType *ImageTy = StructType::getTypeByName(C, "__tgt_device_image"); 449fa9d9a7SFabian Mora if (!ImageTy) 459fa9d9a7SFabian Mora ImageTy = 469fa9d9a7SFabian Mora StructType::create("__tgt_device_image", PointerType::getUnqual(C), 479fa9d9a7SFabian Mora PointerType::getUnqual(C), PointerType::getUnqual(C), 489fa9d9a7SFabian Mora PointerType::getUnqual(C)); 499fa9d9a7SFabian Mora return ImageTy; 509fa9d9a7SFabian Mora } 519fa9d9a7SFabian Mora 529fa9d9a7SFabian Mora PointerType *getDeviceImagePtrTy(Module &M) { 53d7c14c8fSMats Jun Larsen return PointerType::getUnqual(M.getContext()); 549fa9d9a7SFabian Mora } 559fa9d9a7SFabian Mora 569fa9d9a7SFabian Mora // struct __tgt_bin_desc { 579fa9d9a7SFabian Mora // int32_t NumDeviceImages; 589fa9d9a7SFabian Mora // __tgt_device_image *DeviceImages; 599fa9d9a7SFabian Mora // __tgt_offload_entry *HostEntriesBegin; 609fa9d9a7SFabian Mora // __tgt_offload_entry *HostEntriesEnd; 619fa9d9a7SFabian Mora // }; 629fa9d9a7SFabian Mora StructType *getBinDescTy(Module &M) { 639fa9d9a7SFabian Mora LLVMContext &C = M.getContext(); 649fa9d9a7SFabian Mora StructType *DescTy = StructType::getTypeByName(C, "__tgt_bin_desc"); 659fa9d9a7SFabian Mora if (!DescTy) 669fa9d9a7SFabian Mora DescTy = StructType::create( 679fa9d9a7SFabian Mora "__tgt_bin_desc", Type::getInt32Ty(C), getDeviceImagePtrTy(M), 689fa9d9a7SFabian Mora PointerType::getUnqual(C), PointerType::getUnqual(C)); 699fa9d9a7SFabian Mora return DescTy; 709fa9d9a7SFabian Mora } 719fa9d9a7SFabian Mora 729fa9d9a7SFabian Mora PointerType *getBinDescPtrTy(Module &M) { 73d7c14c8fSMats Jun Larsen return PointerType::getUnqual(M.getContext()); 749fa9d9a7SFabian Mora } 759fa9d9a7SFabian Mora 769fa9d9a7SFabian Mora /// Creates binary descriptor for the given device images. Binary descriptor 779fa9d9a7SFabian Mora /// is an object that is passed to the offloading runtime at program startup 789fa9d9a7SFabian Mora /// and it describes all device images available in the executable or shared 799fa9d9a7SFabian Mora /// library. It is defined as follows 809fa9d9a7SFabian Mora /// 819fa9d9a7SFabian Mora /// __attribute__((visibility("hidden"))) 829fa9d9a7SFabian Mora /// extern __tgt_offload_entry *__start_omp_offloading_entries; 839fa9d9a7SFabian Mora /// __attribute__((visibility("hidden"))) 849fa9d9a7SFabian Mora /// extern __tgt_offload_entry *__stop_omp_offloading_entries; 859fa9d9a7SFabian Mora /// 869fa9d9a7SFabian Mora /// static const char Image0[] = { <Bufs.front() contents> }; 879fa9d9a7SFabian Mora /// ... 889fa9d9a7SFabian Mora /// static const char ImageN[] = { <Bufs.back() contents> }; 899fa9d9a7SFabian Mora /// 909fa9d9a7SFabian Mora /// static const __tgt_device_image Images[] = { 919fa9d9a7SFabian Mora /// { 929fa9d9a7SFabian Mora /// Image0, /*ImageStart*/ 939fa9d9a7SFabian Mora /// Image0 + sizeof(Image0), /*ImageEnd*/ 949fa9d9a7SFabian Mora /// __start_omp_offloading_entries, /*EntriesBegin*/ 959fa9d9a7SFabian Mora /// __stop_omp_offloading_entries /*EntriesEnd*/ 969fa9d9a7SFabian Mora /// }, 979fa9d9a7SFabian Mora /// ... 989fa9d9a7SFabian Mora /// { 999fa9d9a7SFabian Mora /// ImageN, /*ImageStart*/ 1009fa9d9a7SFabian Mora /// ImageN + sizeof(ImageN), /*ImageEnd*/ 1019fa9d9a7SFabian Mora /// __start_omp_offloading_entries, /*EntriesBegin*/ 1029fa9d9a7SFabian Mora /// __stop_omp_offloading_entries /*EntriesEnd*/ 1039fa9d9a7SFabian Mora /// } 1049fa9d9a7SFabian Mora /// }; 1059fa9d9a7SFabian Mora /// 1069fa9d9a7SFabian Mora /// static const __tgt_bin_desc BinDesc = { 1079fa9d9a7SFabian Mora /// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/ 1089fa9d9a7SFabian Mora /// Images, /*DeviceImages*/ 1099fa9d9a7SFabian Mora /// __start_omp_offloading_entries, /*HostEntriesBegin*/ 1109fa9d9a7SFabian Mora /// __stop_omp_offloading_entries /*HostEntriesEnd*/ 1119fa9d9a7SFabian Mora /// }; 1129fa9d9a7SFabian Mora /// 1139fa9d9a7SFabian Mora /// Global variable that represents BinDesc is returned. 1149fa9d9a7SFabian Mora GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs, 1155c840542SJoseph Huber EntryArrayTy EntryArray, StringRef Suffix, 1165c840542SJoseph Huber bool Relocatable) { 1179fa9d9a7SFabian Mora LLVMContext &C = M.getContext(); 1189fa9d9a7SFabian Mora auto [EntriesB, EntriesE] = EntryArray; 1199fa9d9a7SFabian Mora 1209fa9d9a7SFabian Mora auto *Zero = ConstantInt::get(getSizeTTy(M), 0u); 1219fa9d9a7SFabian Mora Constant *ZeroZero[] = {Zero, Zero}; 1229fa9d9a7SFabian Mora 1239fa9d9a7SFabian Mora // Create initializer for the images array. 1249fa9d9a7SFabian Mora SmallVector<Constant *, 4u> ImagesInits; 1259fa9d9a7SFabian Mora ImagesInits.reserve(Bufs.size()); 1269fa9d9a7SFabian Mora for (ArrayRef<char> Buf : Bufs) { 1279fa9d9a7SFabian Mora // We embed the full offloading entry so the binary utilities can parse it. 1289fa9d9a7SFabian Mora auto *Data = ConstantDataArray::get(C, Buf); 1299fa9d9a7SFabian Mora auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant=*/true, 1309fa9d9a7SFabian Mora GlobalVariable::InternalLinkage, Data, 1319fa9d9a7SFabian Mora ".omp_offloading.device_image" + Suffix); 1329fa9d9a7SFabian Mora Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 1335c840542SJoseph Huber Image->setSection(Relocatable ? ".llvm.offloading.relocatable" 1345c840542SJoseph Huber : ".llvm.offloading"); 1359fa9d9a7SFabian Mora Image->setAlignment(Align(object::OffloadBinary::getAlignment())); 1369fa9d9a7SFabian Mora 1379fa9d9a7SFabian Mora StringRef Binary(Buf.data(), Buf.size()); 1389fa9d9a7SFabian Mora assert(identify_magic(Binary) == file_magic::offload_binary && 1399fa9d9a7SFabian Mora "Invalid binary format"); 1409fa9d9a7SFabian Mora 1419fa9d9a7SFabian Mora // The device image struct contains the pointer to the beginning and end of 1429fa9d9a7SFabian Mora // the image stored inside of the offload binary. There should only be one 1439fa9d9a7SFabian Mora // of these for each buffer so we parse it out manually. 1449fa9d9a7SFabian Mora const auto *Header = 1459fa9d9a7SFabian Mora reinterpret_cast<const object::OffloadBinary::Header *>( 1469fa9d9a7SFabian Mora Binary.bytes_begin()); 1479fa9d9a7SFabian Mora const auto *Entry = reinterpret_cast<const object::OffloadBinary::Entry *>( 1489fa9d9a7SFabian Mora Binary.bytes_begin() + Header->EntryOffset); 1499fa9d9a7SFabian Mora 1509fa9d9a7SFabian Mora auto *Begin = ConstantInt::get(getSizeTTy(M), Entry->ImageOffset); 1519fa9d9a7SFabian Mora auto *Size = 1529fa9d9a7SFabian Mora ConstantInt::get(getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize); 1539fa9d9a7SFabian Mora Constant *ZeroBegin[] = {Zero, Begin}; 1549fa9d9a7SFabian Mora Constant *ZeroSize[] = {Zero, Size}; 1559fa9d9a7SFabian Mora 1569fa9d9a7SFabian Mora auto *ImageB = 1579fa9d9a7SFabian Mora ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroBegin); 1589fa9d9a7SFabian Mora auto *ImageE = 1599fa9d9a7SFabian Mora ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroSize); 1609fa9d9a7SFabian Mora 1619fa9d9a7SFabian Mora ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB, 1629fa9d9a7SFabian Mora ImageE, EntriesB, EntriesE)); 1639fa9d9a7SFabian Mora } 1649fa9d9a7SFabian Mora 1659fa9d9a7SFabian Mora // Then create images array. 1669fa9d9a7SFabian Mora auto *ImagesData = ConstantArray::get( 1679fa9d9a7SFabian Mora ArrayType::get(getDeviceImageTy(M), ImagesInits.size()), ImagesInits); 1689fa9d9a7SFabian Mora 1699fa9d9a7SFabian Mora auto *Images = 1709fa9d9a7SFabian Mora new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true, 1719fa9d9a7SFabian Mora GlobalValue::InternalLinkage, ImagesData, 1729fa9d9a7SFabian Mora ".omp_offloading.device_images" + Suffix); 1739fa9d9a7SFabian Mora Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 1749fa9d9a7SFabian Mora 1759fa9d9a7SFabian Mora auto *ImagesB = 1769fa9d9a7SFabian Mora ConstantExpr::getGetElementPtr(Images->getValueType(), Images, ZeroZero); 1779fa9d9a7SFabian Mora 1789fa9d9a7SFabian Mora // And finally create the binary descriptor object. 1799fa9d9a7SFabian Mora auto *DescInit = ConstantStruct::get( 1809fa9d9a7SFabian Mora getBinDescTy(M), 1819fa9d9a7SFabian Mora ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB, 1829fa9d9a7SFabian Mora EntriesB, EntriesE); 1839fa9d9a7SFabian Mora 184*13dcc95dSJoseph Huber return new GlobalVariable(M, DescInit->getType(), /*isConstant=*/true, 1859fa9d9a7SFabian Mora GlobalValue::InternalLinkage, DescInit, 1869fa9d9a7SFabian Mora ".omp_offloading.descriptor" + Suffix); 1879fa9d9a7SFabian Mora } 1889fa9d9a7SFabian Mora 189421085fdSJoseph Huber Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc, 1909fa9d9a7SFabian Mora StringRef Suffix) { 1919fa9d9a7SFabian Mora LLVMContext &C = M.getContext(); 1929fa9d9a7SFabian Mora auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 1939fa9d9a7SFabian Mora auto *Func = 1949fa9d9a7SFabian Mora Function::Create(FuncTy, GlobalValue::InternalLinkage, 1959fa9d9a7SFabian Mora ".omp_offloading.descriptor_unreg" + Suffix, &M); 1969fa9d9a7SFabian Mora Func->setSection(".text.startup"); 1979fa9d9a7SFabian Mora 1989fa9d9a7SFabian Mora // Get __tgt_unregister_lib function declaration. 1999fa9d9a7SFabian Mora auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), 2009fa9d9a7SFabian Mora /*isVarArg*/ false); 2019fa9d9a7SFabian Mora FunctionCallee UnRegFuncC = 2029fa9d9a7SFabian Mora M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy); 2039fa9d9a7SFabian Mora 2049fa9d9a7SFabian Mora // Construct function body 2059fa9d9a7SFabian Mora IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); 2069fa9d9a7SFabian Mora Builder.CreateCall(UnRegFuncC, BinDesc); 2079fa9d9a7SFabian Mora Builder.CreateRetVoid(); 2089fa9d9a7SFabian Mora 209421085fdSJoseph Huber return Func; 210421085fdSJoseph Huber } 211421085fdSJoseph Huber 212421085fdSJoseph Huber void createRegisterFunction(Module &M, GlobalVariable *BinDesc, 213421085fdSJoseph Huber StringRef Suffix) { 214421085fdSJoseph Huber LLVMContext &C = M.getContext(); 215421085fdSJoseph Huber auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 216421085fdSJoseph Huber auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, 217421085fdSJoseph Huber ".omp_offloading.descriptor_reg" + Suffix, &M); 218421085fdSJoseph Huber Func->setSection(".text.startup"); 219421085fdSJoseph Huber 220421085fdSJoseph Huber // Get __tgt_register_lib function declaration. 221421085fdSJoseph Huber auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), 222421085fdSJoseph Huber /*isVarArg*/ false); 223421085fdSJoseph Huber FunctionCallee RegFuncC = 224421085fdSJoseph Huber M.getOrInsertFunction("__tgt_register_lib", RegFuncTy); 225421085fdSJoseph Huber 226421085fdSJoseph Huber auto *AtExitTy = FunctionType::get( 227421085fdSJoseph Huber Type::getInt32Ty(C), PointerType::getUnqual(C), /*isVarArg=*/false); 228421085fdSJoseph Huber FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy); 229421085fdSJoseph Huber 230421085fdSJoseph Huber Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix); 231421085fdSJoseph Huber 232421085fdSJoseph Huber // Construct function body 233421085fdSJoseph Huber IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); 234421085fdSJoseph Huber 235fa9e90f5SJoseph Huber Builder.CreateCall(RegFuncC, BinDesc); 236fa9e90f5SJoseph Huber 237421085fdSJoseph Huber // Register the destructors with 'atexit'. This is expected by the CUDA 238421085fdSJoseph Huber // runtime and ensures that we clean up before dynamic objects are destroyed. 239fa9e90f5SJoseph Huber // This needs to be done after plugin initialization to ensure that it is 240fa9e90f5SJoseph Huber // called before the plugin runtime is destroyed. 241421085fdSJoseph Huber Builder.CreateCall(AtExit, UnregFunc); 242421085fdSJoseph Huber Builder.CreateRetVoid(); 243421085fdSJoseph Huber 244421085fdSJoseph Huber // Add this function to constructors. 245421085fdSJoseph Huber appendToGlobalCtors(M, Func, /*Priority=*/101); 2469fa9d9a7SFabian Mora } 2479fa9d9a7SFabian Mora 2489fa9d9a7SFabian Mora // struct fatbin_wrapper { 2499fa9d9a7SFabian Mora // int32_t magic; 2509fa9d9a7SFabian Mora // int32_t version; 2519fa9d9a7SFabian Mora // void *image; 2529fa9d9a7SFabian Mora // void *reserved; 2539fa9d9a7SFabian Mora //}; 2549fa9d9a7SFabian Mora StructType *getFatbinWrapperTy(Module &M) { 2559fa9d9a7SFabian Mora LLVMContext &C = M.getContext(); 2569fa9d9a7SFabian Mora StructType *FatbinTy = StructType::getTypeByName(C, "fatbin_wrapper"); 2579fa9d9a7SFabian Mora if (!FatbinTy) 2589fa9d9a7SFabian Mora FatbinTy = StructType::create( 2599fa9d9a7SFabian Mora "fatbin_wrapper", Type::getInt32Ty(C), Type::getInt32Ty(C), 2609fa9d9a7SFabian Mora PointerType::getUnqual(C), PointerType::getUnqual(C)); 2619fa9d9a7SFabian Mora return FatbinTy; 2629fa9d9a7SFabian Mora } 2639fa9d9a7SFabian Mora 2649fa9d9a7SFabian Mora /// Embed the image \p Image into the module \p M so it can be found by the 2659fa9d9a7SFabian Mora /// runtime. 2669fa9d9a7SFabian Mora GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP, 2679fa9d9a7SFabian Mora StringRef Suffix) { 2689fa9d9a7SFabian Mora LLVMContext &C = M.getContext(); 2699fa9d9a7SFabian Mora llvm::Type *Int8PtrTy = PointerType::getUnqual(C); 2709fa9d9a7SFabian Mora llvm::Triple Triple = llvm::Triple(M.getTargetTriple()); 2719fa9d9a7SFabian Mora 2729fa9d9a7SFabian Mora // Create the global string containing the fatbinary. 2739fa9d9a7SFabian Mora StringRef FatbinConstantSection = 2749fa9d9a7SFabian Mora IsHIP ? ".hip_fatbin" 2759fa9d9a7SFabian Mora : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin"); 2769fa9d9a7SFabian Mora auto *Data = ConstantDataArray::get(C, Image); 2779fa9d9a7SFabian Mora auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true, 2789fa9d9a7SFabian Mora GlobalVariable::InternalLinkage, Data, 2799fa9d9a7SFabian Mora ".fatbin_image" + Suffix); 2809fa9d9a7SFabian Mora Fatbin->setSection(FatbinConstantSection); 2819fa9d9a7SFabian Mora 2829fa9d9a7SFabian Mora // Create the fatbinary wrapper 2839fa9d9a7SFabian Mora StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment" 2849fa9d9a7SFabian Mora : Triple.isMacOSX() ? "__NV_CUDA,__fatbin" 2859fa9d9a7SFabian Mora : ".nvFatBinSegment"; 2869fa9d9a7SFabian Mora Constant *FatbinWrapper[] = { 2879fa9d9a7SFabian Mora ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic), 2889fa9d9a7SFabian Mora ConstantInt::get(Type::getInt32Ty(C), 1), 2899fa9d9a7SFabian Mora ConstantExpr::getPointerBitCastOrAddrSpaceCast(Fatbin, Int8PtrTy), 2909fa9d9a7SFabian Mora ConstantPointerNull::get(PointerType::getUnqual(C))}; 2919fa9d9a7SFabian Mora 2929fa9d9a7SFabian Mora Constant *FatbinInitializer = 2939fa9d9a7SFabian Mora ConstantStruct::get(getFatbinWrapperTy(M), FatbinWrapper); 2949fa9d9a7SFabian Mora 2959fa9d9a7SFabian Mora auto *FatbinDesc = 2969fa9d9a7SFabian Mora new GlobalVariable(M, getFatbinWrapperTy(M), 2979fa9d9a7SFabian Mora /*isConstant*/ true, GlobalValue::InternalLinkage, 2989fa9d9a7SFabian Mora FatbinInitializer, ".fatbin_wrapper" + Suffix); 2999fa9d9a7SFabian Mora FatbinDesc->setSection(FatbinWrapperSection); 3009fa9d9a7SFabian Mora FatbinDesc->setAlignment(Align(8)); 3019fa9d9a7SFabian Mora 3029fa9d9a7SFabian Mora return FatbinDesc; 3039fa9d9a7SFabian Mora } 3049fa9d9a7SFabian Mora 3059fa9d9a7SFabian Mora /// Create the register globals function. We will iterate all of the offloading 3069fa9d9a7SFabian Mora /// entries stored at the begin / end symbols and register them according to 3079fa9d9a7SFabian Mora /// their type. This creates the following function in IR: 3089fa9d9a7SFabian Mora /// 3099fa9d9a7SFabian Mora /// extern struct __tgt_offload_entry __start_cuda_offloading_entries; 3109fa9d9a7SFabian Mora /// extern struct __tgt_offload_entry __stop_cuda_offloading_entries; 3119fa9d9a7SFabian Mora /// 3129fa9d9a7SFabian Mora /// extern void __cudaRegisterFunction(void **, void *, void *, void *, int, 3139fa9d9a7SFabian Mora /// void *, void *, void *, void *, int *); 3149fa9d9a7SFabian Mora /// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t, 3159fa9d9a7SFabian Mora /// int64_t, int32_t, int32_t); 3169fa9d9a7SFabian Mora /// 3179fa9d9a7SFabian Mora /// void __cudaRegisterTest(void **fatbinHandle) { 3189fa9d9a7SFabian Mora /// for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries; 3199fa9d9a7SFabian Mora /// entry != &__stop_cuda_offloading_entries; ++entry) { 320*13dcc95dSJoseph Huber /// if (entry->Kind != OFK_CUDA) 321*13dcc95dSJoseph Huber /// continue 322*13dcc95dSJoseph Huber /// 323*13dcc95dSJoseph Huber /// if (!entry->Size) 3249fa9d9a7SFabian Mora /// __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name, 3259fa9d9a7SFabian Mora /// entry->name, -1, 0, 0, 0, 0, 0); 3269fa9d9a7SFabian Mora /// else 3279fa9d9a7SFabian Mora /// __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name, 3289fa9d9a7SFabian Mora /// 0, entry->size, 0, 0); 3299fa9d9a7SFabian Mora /// } 3309fa9d9a7SFabian Mora /// } 3319fa9d9a7SFabian Mora Function *createRegisterGlobalsFunction(Module &M, bool IsHIP, 3329fa9d9a7SFabian Mora EntryArrayTy EntryArray, 3339fa9d9a7SFabian Mora StringRef Suffix, 3349fa9d9a7SFabian Mora bool EmitSurfacesAndTextures) { 3359fa9d9a7SFabian Mora LLVMContext &C = M.getContext(); 3369fa9d9a7SFabian Mora auto [EntriesB, EntriesE] = EntryArray; 3379fa9d9a7SFabian Mora 3389fa9d9a7SFabian Mora // Get the __cudaRegisterFunction function declaration. 3399fa9d9a7SFabian Mora PointerType *Int8PtrTy = PointerType::get(C, 0); 3409fa9d9a7SFabian Mora PointerType *Int8PtrPtrTy = PointerType::get(C, 0); 3419fa9d9a7SFabian Mora PointerType *Int32PtrTy = PointerType::get(C, 0); 3429fa9d9a7SFabian Mora auto *RegFuncTy = FunctionType::get( 3439fa9d9a7SFabian Mora Type::getInt32Ty(C), 3449fa9d9a7SFabian Mora {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C), 3459fa9d9a7SFabian Mora Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy}, 3469fa9d9a7SFabian Mora /*isVarArg*/ false); 3479fa9d9a7SFabian Mora FunctionCallee RegFunc = M.getOrInsertFunction( 3489fa9d9a7SFabian Mora IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy); 3499fa9d9a7SFabian Mora 3509fa9d9a7SFabian Mora // Get the __cudaRegisterVar function declaration. 3519fa9d9a7SFabian Mora auto *RegVarTy = FunctionType::get( 3529fa9d9a7SFabian Mora Type::getVoidTy(C), 3539fa9d9a7SFabian Mora {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C), 3549fa9d9a7SFabian Mora getSizeTTy(M), Type::getInt32Ty(C), Type::getInt32Ty(C)}, 3559fa9d9a7SFabian Mora /*isVarArg*/ false); 3569fa9d9a7SFabian Mora FunctionCallee RegVar = M.getOrInsertFunction( 3579fa9d9a7SFabian Mora IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy); 3589fa9d9a7SFabian Mora 3599fa9d9a7SFabian Mora // Get the __cudaRegisterSurface function declaration. 36070a16b90SJoseph Huber FunctionType *RegManagedVarTy = 36170a16b90SJoseph Huber FunctionType::get(Type::getVoidTy(C), 36270a16b90SJoseph Huber {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, 36370a16b90SJoseph Huber getSizeTTy(M), Type::getInt32Ty(C)}, 36470a16b90SJoseph Huber /*isVarArg=*/false); 36570a16b90SJoseph Huber FunctionCallee RegManagedVar = M.getOrInsertFunction( 36670a16b90SJoseph Huber IsHIP ? "__hipRegisterManagedVar" : "__cudaRegisterManagedVar", 36770a16b90SJoseph Huber RegManagedVarTy); 36870a16b90SJoseph Huber 36970a16b90SJoseph Huber // Get the __cudaRegisterSurface function declaration. 3709fa9d9a7SFabian Mora FunctionType *RegSurfaceTy = 3719fa9d9a7SFabian Mora FunctionType::get(Type::getVoidTy(C), 3729fa9d9a7SFabian Mora {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, 3739fa9d9a7SFabian Mora Type::getInt32Ty(C), Type::getInt32Ty(C)}, 3749fa9d9a7SFabian Mora /*isVarArg=*/false); 3759fa9d9a7SFabian Mora FunctionCallee RegSurface = M.getOrInsertFunction( 3769fa9d9a7SFabian Mora IsHIP ? "__hipRegisterSurface" : "__cudaRegisterSurface", RegSurfaceTy); 3779fa9d9a7SFabian Mora 3789fa9d9a7SFabian Mora // Get the __cudaRegisterTexture function declaration. 3799fa9d9a7SFabian Mora FunctionType *RegTextureTy = FunctionType::get( 3809fa9d9a7SFabian Mora Type::getVoidTy(C), 3819fa9d9a7SFabian Mora {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C), 3829fa9d9a7SFabian Mora Type::getInt32Ty(C), Type::getInt32Ty(C)}, 3839fa9d9a7SFabian Mora /*isVarArg=*/false); 3849fa9d9a7SFabian Mora FunctionCallee RegTexture = M.getOrInsertFunction( 3859fa9d9a7SFabian Mora IsHIP ? "__hipRegisterTexture" : "__cudaRegisterTexture", RegTextureTy); 3869fa9d9a7SFabian Mora 3879fa9d9a7SFabian Mora auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C), Int8PtrPtrTy, 3889fa9d9a7SFabian Mora /*isVarArg*/ false); 3899fa9d9a7SFabian Mora auto *RegGlobalsFn = 3909fa9d9a7SFabian Mora Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage, 3919fa9d9a7SFabian Mora IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M); 3929fa9d9a7SFabian Mora RegGlobalsFn->setSection(".text.startup"); 3939fa9d9a7SFabian Mora 3949fa9d9a7SFabian Mora // Create the loop to register all the entries. 3959fa9d9a7SFabian Mora IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn)); 3969fa9d9a7SFabian Mora auto *EntryBB = BasicBlock::Create(C, "while.entry", RegGlobalsFn); 397*13dcc95dSJoseph Huber auto *IfKindBB = BasicBlock::Create(C, "if.kind", RegGlobalsFn); 3989fa9d9a7SFabian Mora auto *IfThenBB = BasicBlock::Create(C, "if.then", RegGlobalsFn); 3999fa9d9a7SFabian Mora auto *IfElseBB = BasicBlock::Create(C, "if.else", RegGlobalsFn); 4009fa9d9a7SFabian Mora auto *SwGlobalBB = BasicBlock::Create(C, "sw.global", RegGlobalsFn); 4019fa9d9a7SFabian Mora auto *SwManagedBB = BasicBlock::Create(C, "sw.managed", RegGlobalsFn); 4029fa9d9a7SFabian Mora auto *SwSurfaceBB = BasicBlock::Create(C, "sw.surface", RegGlobalsFn); 4039fa9d9a7SFabian Mora auto *SwTextureBB = BasicBlock::Create(C, "sw.texture", RegGlobalsFn); 4049fa9d9a7SFabian Mora auto *IfEndBB = BasicBlock::Create(C, "if.end", RegGlobalsFn); 4059fa9d9a7SFabian Mora auto *ExitBB = BasicBlock::Create(C, "while.end", RegGlobalsFn); 4069fa9d9a7SFabian Mora 4079fa9d9a7SFabian Mora auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE); 4089fa9d9a7SFabian Mora Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB); 4099fa9d9a7SFabian Mora Builder.SetInsertPoint(EntryBB); 4109fa9d9a7SFabian Mora auto *Entry = Builder.CreatePHI(PointerType::getUnqual(C), 2, "entry"); 4119fa9d9a7SFabian Mora auto *AddrPtr = 4129fa9d9a7SFabian Mora Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 413*13dcc95dSJoseph Huber {ConstantInt::get(Type::getInt32Ty(C), 0), 414*13dcc95dSJoseph Huber ConstantInt::get(Type::getInt32Ty(C), 4)}); 4159fa9d9a7SFabian Mora auto *Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr, "addr"); 416*13dcc95dSJoseph Huber auto *AuxAddrPtr = 417*13dcc95dSJoseph Huber Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 418*13dcc95dSJoseph Huber {ConstantInt::get(Type::getInt32Ty(C), 0), 419*13dcc95dSJoseph Huber ConstantInt::get(Type::getInt32Ty(C), 8)}); 420*13dcc95dSJoseph Huber auto *AuxAddr = Builder.CreateLoad(Int8PtrTy, AuxAddrPtr, "aux_addr"); 421*13dcc95dSJoseph Huber auto *KindPtr = 422*13dcc95dSJoseph Huber Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 423*13dcc95dSJoseph Huber {ConstantInt::get(Type::getInt32Ty(C), 0), 424*13dcc95dSJoseph Huber ConstantInt::get(Type::getInt32Ty(C), 2)}); 425*13dcc95dSJoseph Huber auto *Kind = Builder.CreateLoad(Type::getInt16Ty(C), KindPtr, "kind"); 4269fa9d9a7SFabian Mora auto *NamePtr = 4279fa9d9a7SFabian Mora Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 428*13dcc95dSJoseph Huber {ConstantInt::get(Type::getInt32Ty(C), 0), 429*13dcc95dSJoseph Huber ConstantInt::get(Type::getInt32Ty(C), 5)}); 4309fa9d9a7SFabian Mora auto *Name = Builder.CreateLoad(Int8PtrTy, NamePtr, "name"); 4319fa9d9a7SFabian Mora auto *SizePtr = 4329fa9d9a7SFabian Mora Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 433*13dcc95dSJoseph Huber {ConstantInt::get(Type::getInt32Ty(C), 0), 434*13dcc95dSJoseph Huber ConstantInt::get(Type::getInt32Ty(C), 6)}); 435*13dcc95dSJoseph Huber auto *Size = Builder.CreateLoad(Type::getInt64Ty(C), SizePtr, "size"); 4369fa9d9a7SFabian Mora auto *FlagsPtr = 4379fa9d9a7SFabian Mora Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 438*13dcc95dSJoseph Huber {ConstantInt::get(Type::getInt32Ty(C), 0), 4399fa9d9a7SFabian Mora ConstantInt::get(Type::getInt32Ty(C), 3)}); 4409fa9d9a7SFabian Mora auto *Flags = Builder.CreateLoad(Type::getInt32Ty(C), FlagsPtr, "flags"); 4419fa9d9a7SFabian Mora auto *DataPtr = 4429fa9d9a7SFabian Mora Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 443*13dcc95dSJoseph Huber {ConstantInt::get(Type::getInt32Ty(C), 0), 444*13dcc95dSJoseph Huber ConstantInt::get(Type::getInt32Ty(C), 7)}); 445*13dcc95dSJoseph Huber auto *Data = Builder.CreateTrunc( 446*13dcc95dSJoseph Huber Builder.CreateLoad(Type::getInt64Ty(C), DataPtr, "data"), 447*13dcc95dSJoseph Huber Type::getInt32Ty(C)); 448*13dcc95dSJoseph Huber auto *Type = Builder.CreateAnd( 4499fa9d9a7SFabian Mora Flags, ConstantInt::get(Type::getInt32Ty(C), 0x7), "type"); 4509fa9d9a7SFabian Mora 4519fa9d9a7SFabian Mora // Extract the flags stored in the bit-field and convert them to C booleans. 4529fa9d9a7SFabian Mora auto *ExternBit = Builder.CreateAnd( 4539fa9d9a7SFabian Mora Flags, ConstantInt::get(Type::getInt32Ty(C), 4549fa9d9a7SFabian Mora llvm::offloading::OffloadGlobalExtern)); 4559fa9d9a7SFabian Mora auto *Extern = Builder.CreateLShr( 4569fa9d9a7SFabian Mora ExternBit, ConstantInt::get(Type::getInt32Ty(C), 3), "extern"); 4579fa9d9a7SFabian Mora auto *ConstantBit = Builder.CreateAnd( 4589fa9d9a7SFabian Mora Flags, ConstantInt::get(Type::getInt32Ty(C), 4599fa9d9a7SFabian Mora llvm::offloading::OffloadGlobalConstant)); 4609fa9d9a7SFabian Mora auto *Const = Builder.CreateLShr( 4619fa9d9a7SFabian Mora ConstantBit, ConstantInt::get(Type::getInt32Ty(C), 4), "constant"); 4629fa9d9a7SFabian Mora auto *NormalizedBit = Builder.CreateAnd( 4639fa9d9a7SFabian Mora Flags, ConstantInt::get(Type::getInt32Ty(C), 4649fa9d9a7SFabian Mora llvm::offloading::OffloadGlobalNormalized)); 4659fa9d9a7SFabian Mora auto *Normalized = Builder.CreateLShr( 4669fa9d9a7SFabian Mora NormalizedBit, ConstantInt::get(Type::getInt32Ty(C), 5), "normalized"); 467*13dcc95dSJoseph Huber auto *KindCond = Builder.CreateICmpEQ( 468*13dcc95dSJoseph Huber Kind, ConstantInt::get(Type::getInt16Ty(C), 469*13dcc95dSJoseph Huber IsHIP ? object::OffloadKind::OFK_HIP 470*13dcc95dSJoseph Huber : object::OffloadKind::OFK_Cuda)); 471*13dcc95dSJoseph Huber Builder.CreateCondBr(KindCond, IfKindBB, IfEndBB); 472*13dcc95dSJoseph Huber Builder.SetInsertPoint(IfKindBB); 473*13dcc95dSJoseph Huber auto *FnCond = Builder.CreateICmpEQ( 474*13dcc95dSJoseph Huber Size, ConstantInt::getNullValue(Type::getInt64Ty(C))); 4759fa9d9a7SFabian Mora Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB); 4769fa9d9a7SFabian Mora 4779fa9d9a7SFabian Mora // Create kernel registration code. 4789fa9d9a7SFabian Mora Builder.SetInsertPoint(IfThenBB); 4799fa9d9a7SFabian Mora Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(), Addr, Name, Name, 4809fa9d9a7SFabian Mora ConstantInt::get(Type::getInt32Ty(C), -1), 4819fa9d9a7SFabian Mora ConstantPointerNull::get(Int8PtrTy), 4829fa9d9a7SFabian Mora ConstantPointerNull::get(Int8PtrTy), 4839fa9d9a7SFabian Mora ConstantPointerNull::get(Int8PtrTy), 4849fa9d9a7SFabian Mora ConstantPointerNull::get(Int8PtrTy), 4859fa9d9a7SFabian Mora ConstantPointerNull::get(Int32PtrTy)}); 4869fa9d9a7SFabian Mora Builder.CreateBr(IfEndBB); 4879fa9d9a7SFabian Mora Builder.SetInsertPoint(IfElseBB); 4889fa9d9a7SFabian Mora 489*13dcc95dSJoseph Huber auto *Switch = Builder.CreateSwitch(Type, IfEndBB); 4909fa9d9a7SFabian Mora // Create global variable registration code. 4919fa9d9a7SFabian Mora Builder.SetInsertPoint(SwGlobalBB); 4929fa9d9a7SFabian Mora Builder.CreateCall(RegVar, 4939fa9d9a7SFabian Mora {RegGlobalsFn->arg_begin(), Addr, Name, Name, Extern, Size, 4949fa9d9a7SFabian Mora Const, ConstantInt::get(Type::getInt32Ty(C), 0)}); 4959fa9d9a7SFabian Mora Builder.CreateBr(IfEndBB); 4969fa9d9a7SFabian Mora Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalEntry), 4979fa9d9a7SFabian Mora SwGlobalBB); 4989fa9d9a7SFabian Mora 4999fa9d9a7SFabian Mora // Create managed variable registration code. 5009fa9d9a7SFabian Mora Builder.SetInsertPoint(SwManagedBB); 501*13dcc95dSJoseph Huber Builder.CreateCall(RegManagedVar, {RegGlobalsFn->arg_begin(), AuxAddr, Addr, 502*13dcc95dSJoseph Huber Name, Size, Data}); 5039fa9d9a7SFabian Mora Builder.CreateBr(IfEndBB); 5049fa9d9a7SFabian Mora Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry), 5059fa9d9a7SFabian Mora SwManagedBB); 5069fa9d9a7SFabian Mora // Create surface variable registration code. 5079fa9d9a7SFabian Mora Builder.SetInsertPoint(SwSurfaceBB); 5089fa9d9a7SFabian Mora if (EmitSurfacesAndTextures) 5099fa9d9a7SFabian Mora Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(), Addr, Name, Name, 5109fa9d9a7SFabian Mora Data, Extern}); 5119fa9d9a7SFabian Mora Builder.CreateBr(IfEndBB); 5129fa9d9a7SFabian Mora Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalSurfaceEntry), 5139fa9d9a7SFabian Mora SwSurfaceBB); 5149fa9d9a7SFabian Mora 5159fa9d9a7SFabian Mora // Create texture variable registration code. 5169fa9d9a7SFabian Mora Builder.SetInsertPoint(SwTextureBB); 5179fa9d9a7SFabian Mora if (EmitSurfacesAndTextures) 5189fa9d9a7SFabian Mora Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(), Addr, Name, Name, 5199fa9d9a7SFabian Mora Data, Normalized, Extern}); 5209fa9d9a7SFabian Mora Builder.CreateBr(IfEndBB); 5219fa9d9a7SFabian Mora Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalTextureEntry), 5229fa9d9a7SFabian Mora SwTextureBB); 5239fa9d9a7SFabian Mora 5249fa9d9a7SFabian Mora Builder.SetInsertPoint(IfEndBB); 5259fa9d9a7SFabian Mora auto *NewEntry = Builder.CreateInBoundsGEP( 5269fa9d9a7SFabian Mora offloading::getEntryTy(M), Entry, ConstantInt::get(getSizeTTy(M), 1)); 5279fa9d9a7SFabian Mora auto *Cmp = Builder.CreateICmpEQ( 5289fa9d9a7SFabian Mora NewEntry, 5299fa9d9a7SFabian Mora ConstantExpr::getInBoundsGetElementPtr( 5309fa9d9a7SFabian Mora ArrayType::get(offloading::getEntryTy(M), 0), EntriesE, 5319fa9d9a7SFabian Mora ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0), 5329fa9d9a7SFabian Mora ConstantInt::get(getSizeTTy(M), 0)}))); 5339fa9d9a7SFabian Mora Entry->addIncoming( 5349fa9d9a7SFabian Mora ConstantExpr::getInBoundsGetElementPtr( 5359fa9d9a7SFabian Mora ArrayType::get(offloading::getEntryTy(M), 0), EntriesB, 5369fa9d9a7SFabian Mora ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0), 5379fa9d9a7SFabian Mora ConstantInt::get(getSizeTTy(M), 0)})), 5389fa9d9a7SFabian Mora &RegGlobalsFn->getEntryBlock()); 5399fa9d9a7SFabian Mora Entry->addIncoming(NewEntry, IfEndBB); 5409fa9d9a7SFabian Mora Builder.CreateCondBr(Cmp, ExitBB, EntryBB); 5419fa9d9a7SFabian Mora Builder.SetInsertPoint(ExitBB); 5429fa9d9a7SFabian Mora Builder.CreateRetVoid(); 5439fa9d9a7SFabian Mora 5449fa9d9a7SFabian Mora return RegGlobalsFn; 5459fa9d9a7SFabian Mora } 5469fa9d9a7SFabian Mora 5479fa9d9a7SFabian Mora // Create the constructor and destructor to register the fatbinary with the CUDA 5489fa9d9a7SFabian Mora // runtime. 5499fa9d9a7SFabian Mora void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc, 5509fa9d9a7SFabian Mora bool IsHIP, EntryArrayTy EntryArray, 5519fa9d9a7SFabian Mora StringRef Suffix, 5529fa9d9a7SFabian Mora bool EmitSurfacesAndTextures) { 5539fa9d9a7SFabian Mora LLVMContext &C = M.getContext(); 5549fa9d9a7SFabian Mora auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 5559fa9d9a7SFabian Mora auto *CtorFunc = Function::Create( 5569fa9d9a7SFabian Mora CtorFuncTy, GlobalValue::InternalLinkage, 5579fa9d9a7SFabian Mora (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M); 5589fa9d9a7SFabian Mora CtorFunc->setSection(".text.startup"); 5599fa9d9a7SFabian Mora 5609fa9d9a7SFabian Mora auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 5619fa9d9a7SFabian Mora auto *DtorFunc = Function::Create( 5629fa9d9a7SFabian Mora DtorFuncTy, GlobalValue::InternalLinkage, 5639fa9d9a7SFabian Mora (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M); 5649fa9d9a7SFabian Mora DtorFunc->setSection(".text.startup"); 5659fa9d9a7SFabian Mora 5669fa9d9a7SFabian Mora auto *PtrTy = PointerType::getUnqual(C); 5679fa9d9a7SFabian Mora 5689fa9d9a7SFabian Mora // Get the __cudaRegisterFatBinary function declaration. 5699fa9d9a7SFabian Mora auto *RegFatTy = FunctionType::get(PtrTy, PtrTy, /*isVarArg=*/false); 5709fa9d9a7SFabian Mora FunctionCallee RegFatbin = M.getOrInsertFunction( 5719fa9d9a7SFabian Mora IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy); 5729fa9d9a7SFabian Mora // Get the __cudaRegisterFatBinaryEnd function declaration. 5739fa9d9a7SFabian Mora auto *RegFatEndTy = 5749fa9d9a7SFabian Mora FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false); 5759fa9d9a7SFabian Mora FunctionCallee RegFatbinEnd = 5769fa9d9a7SFabian Mora M.getOrInsertFunction("__cudaRegisterFatBinaryEnd", RegFatEndTy); 5779fa9d9a7SFabian Mora // Get the __cudaUnregisterFatBinary function declaration. 5789fa9d9a7SFabian Mora auto *UnregFatTy = 5799fa9d9a7SFabian Mora FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false); 5809fa9d9a7SFabian Mora FunctionCallee UnregFatbin = M.getOrInsertFunction( 5819fa9d9a7SFabian Mora IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary", 5829fa9d9a7SFabian Mora UnregFatTy); 5839fa9d9a7SFabian Mora 5849fa9d9a7SFabian Mora auto *AtExitTy = 5859fa9d9a7SFabian Mora FunctionType::get(Type::getInt32Ty(C), PtrTy, /*isVarArg=*/false); 5869fa9d9a7SFabian Mora FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy); 5879fa9d9a7SFabian Mora 5889fa9d9a7SFabian Mora auto *BinaryHandleGlobal = new llvm::GlobalVariable( 5899fa9d9a7SFabian Mora M, PtrTy, false, llvm::GlobalValue::InternalLinkage, 5909fa9d9a7SFabian Mora llvm::ConstantPointerNull::get(PtrTy), 5919fa9d9a7SFabian Mora (IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle") + Suffix); 5929fa9d9a7SFabian Mora 5939fa9d9a7SFabian Mora // Create the constructor to register this image with the runtime. 5949fa9d9a7SFabian Mora IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc)); 5959fa9d9a7SFabian Mora CallInst *Handle = CtorBuilder.CreateCall( 5969fa9d9a7SFabian Mora RegFatbin, 5979fa9d9a7SFabian Mora ConstantExpr::getPointerBitCastOrAddrSpaceCast(FatbinDesc, PtrTy)); 5989fa9d9a7SFabian Mora CtorBuilder.CreateAlignedStore( 5999fa9d9a7SFabian Mora Handle, BinaryHandleGlobal, 6009fa9d9a7SFabian Mora Align(M.getDataLayout().getPointerTypeSize(PtrTy))); 6019fa9d9a7SFabian Mora CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray, 6029fa9d9a7SFabian Mora Suffix, 6039fa9d9a7SFabian Mora EmitSurfacesAndTextures), 6049fa9d9a7SFabian Mora Handle); 6059fa9d9a7SFabian Mora if (!IsHIP) 6069fa9d9a7SFabian Mora CtorBuilder.CreateCall(RegFatbinEnd, Handle); 6079fa9d9a7SFabian Mora CtorBuilder.CreateCall(AtExit, DtorFunc); 6089fa9d9a7SFabian Mora CtorBuilder.CreateRetVoid(); 6099fa9d9a7SFabian Mora 6109fa9d9a7SFabian Mora // Create the destructor to unregister the image with the runtime. We cannot 6119fa9d9a7SFabian Mora // use a standard global destructor after CUDA 9.2 so this must be called by 612d6fc7d3aSJay Foad // `atexit()` instead. 6139fa9d9a7SFabian Mora IRBuilder<> DtorBuilder(BasicBlock::Create(C, "entry", DtorFunc)); 6149fa9d9a7SFabian Mora LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad( 6159fa9d9a7SFabian Mora PtrTy, BinaryHandleGlobal, 6169fa9d9a7SFabian Mora Align(M.getDataLayout().getPointerTypeSize(PtrTy))); 6179fa9d9a7SFabian Mora DtorBuilder.CreateCall(UnregFatbin, BinaryHandle); 6189fa9d9a7SFabian Mora DtorBuilder.CreateRetVoid(); 6199fa9d9a7SFabian Mora 6209fa9d9a7SFabian Mora // Add this function to constructors. 621421085fdSJoseph Huber appendToGlobalCtors(M, CtorFunc, /*Priority=*/101); 6229fa9d9a7SFabian Mora } 6239fa9d9a7SFabian Mora } // namespace 6249fa9d9a7SFabian Mora 6259fa9d9a7SFabian Mora Error offloading::wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images, 6269fa9d9a7SFabian Mora EntryArrayTy EntryArray, 6275c840542SJoseph Huber llvm::StringRef Suffix, bool Relocatable) { 6285c840542SJoseph Huber GlobalVariable *Desc = 6295c840542SJoseph Huber createBinDesc(M, Images, EntryArray, Suffix, Relocatable); 6309fa9d9a7SFabian Mora if (!Desc) 6319fa9d9a7SFabian Mora return createStringError(inconvertibleErrorCode(), 6329fa9d9a7SFabian Mora "No binary descriptors created."); 6339fa9d9a7SFabian Mora createRegisterFunction(M, Desc, Suffix); 6349fa9d9a7SFabian Mora return Error::success(); 6359fa9d9a7SFabian Mora } 6369fa9d9a7SFabian Mora 6379fa9d9a7SFabian Mora Error offloading::wrapCudaBinary(Module &M, ArrayRef<char> Image, 6389fa9d9a7SFabian Mora EntryArrayTy EntryArray, 6399fa9d9a7SFabian Mora llvm::StringRef Suffix, 6409fa9d9a7SFabian Mora bool EmitSurfacesAndTextures) { 6419fa9d9a7SFabian Mora GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/false, Suffix); 6429fa9d9a7SFabian Mora if (!Desc) 6439fa9d9a7SFabian Mora return createStringError(inconvertibleErrorCode(), 6449fa9d9a7SFabian Mora "No fatbin section created."); 6459fa9d9a7SFabian Mora 6469fa9d9a7SFabian Mora createRegisterFatbinFunction(M, Desc, /*IsHip=*/false, EntryArray, Suffix, 6479fa9d9a7SFabian Mora EmitSurfacesAndTextures); 6489fa9d9a7SFabian Mora return Error::success(); 6499fa9d9a7SFabian Mora } 6509fa9d9a7SFabian Mora 6519fa9d9a7SFabian Mora Error offloading::wrapHIPBinary(Module &M, ArrayRef<char> Image, 6529fa9d9a7SFabian Mora EntryArrayTy EntryArray, llvm::StringRef Suffix, 6539fa9d9a7SFabian Mora bool EmitSurfacesAndTextures) { 6549fa9d9a7SFabian Mora GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/true, Suffix); 6559fa9d9a7SFabian Mora if (!Desc) 6569fa9d9a7SFabian Mora return createStringError(inconvertibleErrorCode(), 6579fa9d9a7SFabian Mora "No fatbin section created."); 6589fa9d9a7SFabian Mora 6599fa9d9a7SFabian Mora createRegisterFatbinFunction(M, Desc, /*IsHip=*/true, EntryArray, Suffix, 6609fa9d9a7SFabian Mora EmitSurfacesAndTextures); 6619fa9d9a7SFabian Mora return Error::success(); 6629fa9d9a7SFabian Mora } 663