17a6dacacSDimitry Andric //===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===// 27a6dacacSDimitry Andric // 37a6dacacSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 47a6dacacSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 57a6dacacSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 67a6dacacSDimitry Andric // 77a6dacacSDimitry Andric //===----------------------------------------------------------------------===// 87a6dacacSDimitry Andric 97a6dacacSDimitry Andric #include "llvm/Frontend/Offloading/OffloadWrapper.h" 107a6dacacSDimitry Andric #include "llvm/ADT/ArrayRef.h" 117a6dacacSDimitry Andric #include "llvm/BinaryFormat/Magic.h" 127a6dacacSDimitry Andric #include "llvm/Frontend/Offloading/Utility.h" 137a6dacacSDimitry Andric #include "llvm/IR/Constants.h" 147a6dacacSDimitry Andric #include "llvm/IR/GlobalVariable.h" 157a6dacacSDimitry Andric #include "llvm/IR/IRBuilder.h" 167a6dacacSDimitry Andric #include "llvm/IR/LLVMContext.h" 177a6dacacSDimitry Andric #include "llvm/IR/Module.h" 187a6dacacSDimitry Andric #include "llvm/Object/OffloadBinary.h" 197a6dacacSDimitry Andric #include "llvm/Support/Error.h" 207a6dacacSDimitry Andric #include "llvm/TargetParser/Triple.h" 217a6dacacSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h" 227a6dacacSDimitry Andric 237a6dacacSDimitry Andric using namespace llvm; 247a6dacacSDimitry Andric using namespace llvm::offloading; 257a6dacacSDimitry Andric 267a6dacacSDimitry Andric namespace { 277a6dacacSDimitry Andric /// Magic number that begins the section containing the CUDA fatbinary. 287a6dacacSDimitry Andric constexpr unsigned CudaFatMagic = 0x466243b1; 297a6dacacSDimitry Andric constexpr unsigned HIPFatMagic = 0x48495046; 307a6dacacSDimitry Andric 317a6dacacSDimitry Andric IntegerType *getSizeTTy(Module &M) { 327a6dacacSDimitry Andric return M.getDataLayout().getIntPtrType(M.getContext()); 337a6dacacSDimitry Andric } 347a6dacacSDimitry Andric 357a6dacacSDimitry Andric // struct __tgt_device_image { 367a6dacacSDimitry Andric // void *ImageStart; 377a6dacacSDimitry Andric // void *ImageEnd; 387a6dacacSDimitry Andric // __tgt_offload_entry *EntriesBegin; 397a6dacacSDimitry Andric // __tgt_offload_entry *EntriesEnd; 407a6dacacSDimitry Andric // }; 417a6dacacSDimitry Andric StructType *getDeviceImageTy(Module &M) { 427a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 437a6dacacSDimitry Andric StructType *ImageTy = StructType::getTypeByName(C, "__tgt_device_image"); 447a6dacacSDimitry Andric if (!ImageTy) 457a6dacacSDimitry Andric ImageTy = 467a6dacacSDimitry Andric StructType::create("__tgt_device_image", PointerType::getUnqual(C), 477a6dacacSDimitry Andric PointerType::getUnqual(C), PointerType::getUnqual(C), 487a6dacacSDimitry Andric PointerType::getUnqual(C)); 497a6dacacSDimitry Andric return ImageTy; 507a6dacacSDimitry Andric } 517a6dacacSDimitry Andric 527a6dacacSDimitry Andric PointerType *getDeviceImagePtrTy(Module &M) { 537a6dacacSDimitry Andric return PointerType::getUnqual(getDeviceImageTy(M)); 547a6dacacSDimitry Andric } 557a6dacacSDimitry Andric 567a6dacacSDimitry Andric // struct __tgt_bin_desc { 577a6dacacSDimitry Andric // int32_t NumDeviceImages; 587a6dacacSDimitry Andric // __tgt_device_image *DeviceImages; 597a6dacacSDimitry Andric // __tgt_offload_entry *HostEntriesBegin; 607a6dacacSDimitry Andric // __tgt_offload_entry *HostEntriesEnd; 617a6dacacSDimitry Andric // }; 627a6dacacSDimitry Andric StructType *getBinDescTy(Module &M) { 637a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 647a6dacacSDimitry Andric StructType *DescTy = StructType::getTypeByName(C, "__tgt_bin_desc"); 657a6dacacSDimitry Andric if (!DescTy) 667a6dacacSDimitry Andric DescTy = StructType::create( 677a6dacacSDimitry Andric "__tgt_bin_desc", Type::getInt32Ty(C), getDeviceImagePtrTy(M), 687a6dacacSDimitry Andric PointerType::getUnqual(C), PointerType::getUnqual(C)); 697a6dacacSDimitry Andric return DescTy; 707a6dacacSDimitry Andric } 717a6dacacSDimitry Andric 727a6dacacSDimitry Andric PointerType *getBinDescPtrTy(Module &M) { 737a6dacacSDimitry Andric return PointerType::getUnqual(getBinDescTy(M)); 747a6dacacSDimitry Andric } 757a6dacacSDimitry Andric 767a6dacacSDimitry Andric /// Creates binary descriptor for the given device images. Binary descriptor 777a6dacacSDimitry Andric /// is an object that is passed to the offloading runtime at program startup 787a6dacacSDimitry Andric /// and it describes all device images available in the executable or shared 797a6dacacSDimitry Andric /// library. It is defined as follows 807a6dacacSDimitry Andric /// 817a6dacacSDimitry Andric /// __attribute__((visibility("hidden"))) 827a6dacacSDimitry Andric /// extern __tgt_offload_entry *__start_omp_offloading_entries; 837a6dacacSDimitry Andric /// __attribute__((visibility("hidden"))) 847a6dacacSDimitry Andric /// extern __tgt_offload_entry *__stop_omp_offloading_entries; 857a6dacacSDimitry Andric /// 867a6dacacSDimitry Andric /// static const char Image0[] = { <Bufs.front() contents> }; 877a6dacacSDimitry Andric /// ... 887a6dacacSDimitry Andric /// static const char ImageN[] = { <Bufs.back() contents> }; 897a6dacacSDimitry Andric /// 907a6dacacSDimitry Andric /// static const __tgt_device_image Images[] = { 917a6dacacSDimitry Andric /// { 927a6dacacSDimitry Andric /// Image0, /*ImageStart*/ 937a6dacacSDimitry Andric /// Image0 + sizeof(Image0), /*ImageEnd*/ 947a6dacacSDimitry Andric /// __start_omp_offloading_entries, /*EntriesBegin*/ 957a6dacacSDimitry Andric /// __stop_omp_offloading_entries /*EntriesEnd*/ 967a6dacacSDimitry Andric /// }, 977a6dacacSDimitry Andric /// ... 987a6dacacSDimitry Andric /// { 997a6dacacSDimitry Andric /// ImageN, /*ImageStart*/ 1007a6dacacSDimitry Andric /// ImageN + sizeof(ImageN), /*ImageEnd*/ 1017a6dacacSDimitry Andric /// __start_omp_offloading_entries, /*EntriesBegin*/ 1027a6dacacSDimitry Andric /// __stop_omp_offloading_entries /*EntriesEnd*/ 1037a6dacacSDimitry Andric /// } 1047a6dacacSDimitry Andric /// }; 1057a6dacacSDimitry Andric /// 1067a6dacacSDimitry Andric /// static const __tgt_bin_desc BinDesc = { 1077a6dacacSDimitry Andric /// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/ 1087a6dacacSDimitry Andric /// Images, /*DeviceImages*/ 1097a6dacacSDimitry Andric /// __start_omp_offloading_entries, /*HostEntriesBegin*/ 1107a6dacacSDimitry Andric /// __stop_omp_offloading_entries /*HostEntriesEnd*/ 1117a6dacacSDimitry Andric /// }; 1127a6dacacSDimitry Andric /// 1137a6dacacSDimitry Andric /// Global variable that represents BinDesc is returned. 1147a6dacacSDimitry Andric GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs, 115*0fca6ea1SDimitry Andric EntryArrayTy EntryArray, StringRef Suffix, 116*0fca6ea1SDimitry Andric bool Relocatable) { 1177a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 1187a6dacacSDimitry Andric auto [EntriesB, EntriesE] = EntryArray; 1197a6dacacSDimitry Andric 1207a6dacacSDimitry Andric auto *Zero = ConstantInt::get(getSizeTTy(M), 0u); 1217a6dacacSDimitry Andric Constant *ZeroZero[] = {Zero, Zero}; 1227a6dacacSDimitry Andric 1237a6dacacSDimitry Andric // Create initializer for the images array. 1247a6dacacSDimitry Andric SmallVector<Constant *, 4u> ImagesInits; 1257a6dacacSDimitry Andric ImagesInits.reserve(Bufs.size()); 1267a6dacacSDimitry Andric for (ArrayRef<char> Buf : Bufs) { 1277a6dacacSDimitry Andric // We embed the full offloading entry so the binary utilities can parse it. 1287a6dacacSDimitry Andric auto *Data = ConstantDataArray::get(C, Buf); 1297a6dacacSDimitry Andric auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant=*/true, 1307a6dacacSDimitry Andric GlobalVariable::InternalLinkage, Data, 1317a6dacacSDimitry Andric ".omp_offloading.device_image" + Suffix); 1327a6dacacSDimitry Andric Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 133*0fca6ea1SDimitry Andric Image->setSection(Relocatable ? ".llvm.offloading.relocatable" 134*0fca6ea1SDimitry Andric : ".llvm.offloading"); 1357a6dacacSDimitry Andric Image->setAlignment(Align(object::OffloadBinary::getAlignment())); 1367a6dacacSDimitry Andric 1377a6dacacSDimitry Andric StringRef Binary(Buf.data(), Buf.size()); 1387a6dacacSDimitry Andric assert(identify_magic(Binary) == file_magic::offload_binary && 1397a6dacacSDimitry Andric "Invalid binary format"); 1407a6dacacSDimitry Andric 1417a6dacacSDimitry Andric // The device image struct contains the pointer to the beginning and end of 1427a6dacacSDimitry Andric // the image stored inside of the offload binary. There should only be one 1437a6dacacSDimitry Andric // of these for each buffer so we parse it out manually. 1447a6dacacSDimitry Andric const auto *Header = 1457a6dacacSDimitry Andric reinterpret_cast<const object::OffloadBinary::Header *>( 1467a6dacacSDimitry Andric Binary.bytes_begin()); 1477a6dacacSDimitry Andric const auto *Entry = reinterpret_cast<const object::OffloadBinary::Entry *>( 1487a6dacacSDimitry Andric Binary.bytes_begin() + Header->EntryOffset); 1497a6dacacSDimitry Andric 1507a6dacacSDimitry Andric auto *Begin = ConstantInt::get(getSizeTTy(M), Entry->ImageOffset); 1517a6dacacSDimitry Andric auto *Size = 1527a6dacacSDimitry Andric ConstantInt::get(getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize); 1537a6dacacSDimitry Andric Constant *ZeroBegin[] = {Zero, Begin}; 1547a6dacacSDimitry Andric Constant *ZeroSize[] = {Zero, Size}; 1557a6dacacSDimitry Andric 1567a6dacacSDimitry Andric auto *ImageB = 1577a6dacacSDimitry Andric ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroBegin); 1587a6dacacSDimitry Andric auto *ImageE = 1597a6dacacSDimitry Andric ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroSize); 1607a6dacacSDimitry Andric 1617a6dacacSDimitry Andric ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB, 1627a6dacacSDimitry Andric ImageE, EntriesB, EntriesE)); 1637a6dacacSDimitry Andric } 1647a6dacacSDimitry Andric 1657a6dacacSDimitry Andric // Then create images array. 1667a6dacacSDimitry Andric auto *ImagesData = ConstantArray::get( 1677a6dacacSDimitry Andric ArrayType::get(getDeviceImageTy(M), ImagesInits.size()), ImagesInits); 1687a6dacacSDimitry Andric 1697a6dacacSDimitry Andric auto *Images = 1707a6dacacSDimitry Andric new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true, 1717a6dacacSDimitry Andric GlobalValue::InternalLinkage, ImagesData, 1727a6dacacSDimitry Andric ".omp_offloading.device_images" + Suffix); 1737a6dacacSDimitry Andric Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 1747a6dacacSDimitry Andric 1757a6dacacSDimitry Andric auto *ImagesB = 1767a6dacacSDimitry Andric ConstantExpr::getGetElementPtr(Images->getValueType(), Images, ZeroZero); 1777a6dacacSDimitry Andric 1787a6dacacSDimitry Andric // And finally create the binary descriptor object. 1797a6dacacSDimitry Andric auto *DescInit = ConstantStruct::get( 1807a6dacacSDimitry Andric getBinDescTy(M), 1817a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB, 1827a6dacacSDimitry Andric EntriesB, EntriesE); 1837a6dacacSDimitry Andric 1847a6dacacSDimitry Andric return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true, 1857a6dacacSDimitry Andric GlobalValue::InternalLinkage, DescInit, 1867a6dacacSDimitry Andric ".omp_offloading.descriptor" + Suffix); 1877a6dacacSDimitry Andric } 1887a6dacacSDimitry Andric 189*0fca6ea1SDimitry Andric Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc, 1907a6dacacSDimitry Andric StringRef Suffix) { 1917a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 1927a6dacacSDimitry Andric auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 1937a6dacacSDimitry Andric auto *Func = 1947a6dacacSDimitry Andric Function::Create(FuncTy, GlobalValue::InternalLinkage, 1957a6dacacSDimitry Andric ".omp_offloading.descriptor_unreg" + Suffix, &M); 1967a6dacacSDimitry Andric Func->setSection(".text.startup"); 1977a6dacacSDimitry Andric 1987a6dacacSDimitry Andric // Get __tgt_unregister_lib function declaration. 1997a6dacacSDimitry Andric auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), 2007a6dacacSDimitry Andric /*isVarArg*/ false); 2017a6dacacSDimitry Andric FunctionCallee UnRegFuncC = 2027a6dacacSDimitry Andric M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy); 2037a6dacacSDimitry Andric 2047a6dacacSDimitry Andric // Construct function body 2057a6dacacSDimitry Andric IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); 2067a6dacacSDimitry Andric Builder.CreateCall(UnRegFuncC, BinDesc); 2077a6dacacSDimitry Andric Builder.CreateRetVoid(); 2087a6dacacSDimitry Andric 209*0fca6ea1SDimitry Andric return Func; 210*0fca6ea1SDimitry Andric } 211*0fca6ea1SDimitry Andric 212*0fca6ea1SDimitry Andric void createRegisterFunction(Module &M, GlobalVariable *BinDesc, 213*0fca6ea1SDimitry Andric StringRef Suffix) { 214*0fca6ea1SDimitry Andric LLVMContext &C = M.getContext(); 215*0fca6ea1SDimitry Andric auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 216*0fca6ea1SDimitry Andric auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, 217*0fca6ea1SDimitry Andric ".omp_offloading.descriptor_reg" + Suffix, &M); 218*0fca6ea1SDimitry Andric Func->setSection(".text.startup"); 219*0fca6ea1SDimitry Andric 220*0fca6ea1SDimitry Andric // Get __tgt_register_lib function declaration. 221*0fca6ea1SDimitry Andric auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), 222*0fca6ea1SDimitry Andric /*isVarArg*/ false); 223*0fca6ea1SDimitry Andric FunctionCallee RegFuncC = 224*0fca6ea1SDimitry Andric M.getOrInsertFunction("__tgt_register_lib", RegFuncTy); 225*0fca6ea1SDimitry Andric 226*0fca6ea1SDimitry Andric auto *AtExitTy = FunctionType::get( 227*0fca6ea1SDimitry Andric Type::getInt32Ty(C), PointerType::getUnqual(C), /*isVarArg=*/false); 228*0fca6ea1SDimitry Andric FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy); 229*0fca6ea1SDimitry Andric 230*0fca6ea1SDimitry Andric Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix); 231*0fca6ea1SDimitry Andric 232*0fca6ea1SDimitry Andric // Construct function body 233*0fca6ea1SDimitry Andric IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); 234*0fca6ea1SDimitry Andric 235*0fca6ea1SDimitry Andric Builder.CreateCall(RegFuncC, BinDesc); 236*0fca6ea1SDimitry Andric 237*0fca6ea1SDimitry Andric // Register the destructors with 'atexit'. This is expected by the CUDA 238*0fca6ea1SDimitry Andric // runtime and ensures that we clean up before dynamic objects are destroyed. 239*0fca6ea1SDimitry Andric // This needs to be done after plugin initialization to ensure that it is 240*0fca6ea1SDimitry Andric // called before the plugin runtime is destroyed. 241*0fca6ea1SDimitry Andric Builder.CreateCall(AtExit, UnregFunc); 242*0fca6ea1SDimitry Andric Builder.CreateRetVoid(); 243*0fca6ea1SDimitry Andric 244*0fca6ea1SDimitry Andric // Add this function to constructors. 245*0fca6ea1SDimitry Andric appendToGlobalCtors(M, Func, /*Priority=*/101); 2467a6dacacSDimitry Andric } 2477a6dacacSDimitry Andric 2487a6dacacSDimitry Andric // struct fatbin_wrapper { 2497a6dacacSDimitry Andric // int32_t magic; 2507a6dacacSDimitry Andric // int32_t version; 2517a6dacacSDimitry Andric // void *image; 2527a6dacacSDimitry Andric // void *reserved; 2537a6dacacSDimitry Andric //}; 2547a6dacacSDimitry Andric StructType *getFatbinWrapperTy(Module &M) { 2557a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 2567a6dacacSDimitry Andric StructType *FatbinTy = StructType::getTypeByName(C, "fatbin_wrapper"); 2577a6dacacSDimitry Andric if (!FatbinTy) 2587a6dacacSDimitry Andric FatbinTy = StructType::create( 2597a6dacacSDimitry Andric "fatbin_wrapper", Type::getInt32Ty(C), Type::getInt32Ty(C), 2607a6dacacSDimitry Andric PointerType::getUnqual(C), PointerType::getUnqual(C)); 2617a6dacacSDimitry Andric return FatbinTy; 2627a6dacacSDimitry Andric } 2637a6dacacSDimitry Andric 2647a6dacacSDimitry Andric /// Embed the image \p Image into the module \p M so it can be found by the 2657a6dacacSDimitry Andric /// runtime. 2667a6dacacSDimitry Andric GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP, 2677a6dacacSDimitry Andric StringRef Suffix) { 2687a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 2697a6dacacSDimitry Andric llvm::Type *Int8PtrTy = PointerType::getUnqual(C); 2707a6dacacSDimitry Andric llvm::Triple Triple = llvm::Triple(M.getTargetTriple()); 2717a6dacacSDimitry Andric 2727a6dacacSDimitry Andric // Create the global string containing the fatbinary. 2737a6dacacSDimitry Andric StringRef FatbinConstantSection = 2747a6dacacSDimitry Andric IsHIP ? ".hip_fatbin" 2757a6dacacSDimitry Andric : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin"); 2767a6dacacSDimitry Andric auto *Data = ConstantDataArray::get(C, Image); 2777a6dacacSDimitry Andric auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true, 2787a6dacacSDimitry Andric GlobalVariable::InternalLinkage, Data, 2797a6dacacSDimitry Andric ".fatbin_image" + Suffix); 2807a6dacacSDimitry Andric Fatbin->setSection(FatbinConstantSection); 2817a6dacacSDimitry Andric 2827a6dacacSDimitry Andric // Create the fatbinary wrapper 2837a6dacacSDimitry Andric StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment" 2847a6dacacSDimitry Andric : Triple.isMacOSX() ? "__NV_CUDA,__fatbin" 2857a6dacacSDimitry Andric : ".nvFatBinSegment"; 2867a6dacacSDimitry Andric Constant *FatbinWrapper[] = { 2877a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic), 2887a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 1), 2897a6dacacSDimitry Andric ConstantExpr::getPointerBitCastOrAddrSpaceCast(Fatbin, Int8PtrTy), 2907a6dacacSDimitry Andric ConstantPointerNull::get(PointerType::getUnqual(C))}; 2917a6dacacSDimitry Andric 2927a6dacacSDimitry Andric Constant *FatbinInitializer = 2937a6dacacSDimitry Andric ConstantStruct::get(getFatbinWrapperTy(M), FatbinWrapper); 2947a6dacacSDimitry Andric 2957a6dacacSDimitry Andric auto *FatbinDesc = 2967a6dacacSDimitry Andric new GlobalVariable(M, getFatbinWrapperTy(M), 2977a6dacacSDimitry Andric /*isConstant*/ true, GlobalValue::InternalLinkage, 2987a6dacacSDimitry Andric FatbinInitializer, ".fatbin_wrapper" + Suffix); 2997a6dacacSDimitry Andric FatbinDesc->setSection(FatbinWrapperSection); 3007a6dacacSDimitry Andric FatbinDesc->setAlignment(Align(8)); 3017a6dacacSDimitry Andric 3027a6dacacSDimitry Andric return FatbinDesc; 3037a6dacacSDimitry Andric } 3047a6dacacSDimitry Andric 3057a6dacacSDimitry Andric /// Create the register globals function. We will iterate all of the offloading 3067a6dacacSDimitry Andric /// entries stored at the begin / end symbols and register them according to 3077a6dacacSDimitry Andric /// their type. This creates the following function in IR: 3087a6dacacSDimitry Andric /// 3097a6dacacSDimitry Andric /// extern struct __tgt_offload_entry __start_cuda_offloading_entries; 3107a6dacacSDimitry Andric /// extern struct __tgt_offload_entry __stop_cuda_offloading_entries; 3117a6dacacSDimitry Andric /// 3127a6dacacSDimitry Andric /// extern void __cudaRegisterFunction(void **, void *, void *, void *, int, 3137a6dacacSDimitry Andric /// void *, void *, void *, void *, int *); 3147a6dacacSDimitry Andric /// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t, 3157a6dacacSDimitry Andric /// int64_t, int32_t, int32_t); 3167a6dacacSDimitry Andric /// 3177a6dacacSDimitry Andric /// void __cudaRegisterTest(void **fatbinHandle) { 3187a6dacacSDimitry Andric /// for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries; 3197a6dacacSDimitry Andric /// entry != &__stop_cuda_offloading_entries; ++entry) { 3207a6dacacSDimitry Andric /// if (!entry->size) 3217a6dacacSDimitry Andric /// __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name, 3227a6dacacSDimitry Andric /// entry->name, -1, 0, 0, 0, 0, 0); 3237a6dacacSDimitry Andric /// else 3247a6dacacSDimitry Andric /// __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name, 3257a6dacacSDimitry Andric /// 0, entry->size, 0, 0); 3267a6dacacSDimitry Andric /// } 3277a6dacacSDimitry Andric /// } 3287a6dacacSDimitry Andric Function *createRegisterGlobalsFunction(Module &M, bool IsHIP, 3297a6dacacSDimitry Andric EntryArrayTy EntryArray, 3307a6dacacSDimitry Andric StringRef Suffix, 3317a6dacacSDimitry Andric bool EmitSurfacesAndTextures) { 3327a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 3337a6dacacSDimitry Andric auto [EntriesB, EntriesE] = EntryArray; 3347a6dacacSDimitry Andric 3357a6dacacSDimitry Andric // Get the __cudaRegisterFunction function declaration. 3367a6dacacSDimitry Andric PointerType *Int8PtrTy = PointerType::get(C, 0); 3377a6dacacSDimitry Andric PointerType *Int8PtrPtrTy = PointerType::get(C, 0); 3387a6dacacSDimitry Andric PointerType *Int32PtrTy = PointerType::get(C, 0); 3397a6dacacSDimitry Andric auto *RegFuncTy = FunctionType::get( 3407a6dacacSDimitry Andric Type::getInt32Ty(C), 3417a6dacacSDimitry Andric {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C), 3427a6dacacSDimitry Andric Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy}, 3437a6dacacSDimitry Andric /*isVarArg*/ false); 3447a6dacacSDimitry Andric FunctionCallee RegFunc = M.getOrInsertFunction( 3457a6dacacSDimitry Andric IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy); 3467a6dacacSDimitry Andric 3477a6dacacSDimitry Andric // Get the __cudaRegisterVar function declaration. 3487a6dacacSDimitry Andric auto *RegVarTy = FunctionType::get( 3497a6dacacSDimitry Andric Type::getVoidTy(C), 3507a6dacacSDimitry Andric {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C), 3517a6dacacSDimitry Andric getSizeTTy(M), Type::getInt32Ty(C), Type::getInt32Ty(C)}, 3527a6dacacSDimitry Andric /*isVarArg*/ false); 3537a6dacacSDimitry Andric FunctionCallee RegVar = M.getOrInsertFunction( 3547a6dacacSDimitry Andric IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy); 3557a6dacacSDimitry Andric 3567a6dacacSDimitry Andric // Get the __cudaRegisterSurface function declaration. 3577a6dacacSDimitry Andric FunctionType *RegSurfaceTy = 3587a6dacacSDimitry Andric FunctionType::get(Type::getVoidTy(C), 3597a6dacacSDimitry Andric {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, 3607a6dacacSDimitry Andric Type::getInt32Ty(C), Type::getInt32Ty(C)}, 3617a6dacacSDimitry Andric /*isVarArg=*/false); 3627a6dacacSDimitry Andric FunctionCallee RegSurface = M.getOrInsertFunction( 3637a6dacacSDimitry Andric IsHIP ? "__hipRegisterSurface" : "__cudaRegisterSurface", RegSurfaceTy); 3647a6dacacSDimitry Andric 3657a6dacacSDimitry Andric // Get the __cudaRegisterTexture function declaration. 3667a6dacacSDimitry Andric FunctionType *RegTextureTy = FunctionType::get( 3677a6dacacSDimitry Andric Type::getVoidTy(C), 3687a6dacacSDimitry Andric {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C), 3697a6dacacSDimitry Andric Type::getInt32Ty(C), Type::getInt32Ty(C)}, 3707a6dacacSDimitry Andric /*isVarArg=*/false); 3717a6dacacSDimitry Andric FunctionCallee RegTexture = M.getOrInsertFunction( 3727a6dacacSDimitry Andric IsHIP ? "__hipRegisterTexture" : "__cudaRegisterTexture", RegTextureTy); 3737a6dacacSDimitry Andric 3747a6dacacSDimitry Andric auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C), Int8PtrPtrTy, 3757a6dacacSDimitry Andric /*isVarArg*/ false); 3767a6dacacSDimitry Andric auto *RegGlobalsFn = 3777a6dacacSDimitry Andric Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage, 3787a6dacacSDimitry Andric IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M); 3797a6dacacSDimitry Andric RegGlobalsFn->setSection(".text.startup"); 3807a6dacacSDimitry Andric 3817a6dacacSDimitry Andric // Create the loop to register all the entries. 3827a6dacacSDimitry Andric IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn)); 3837a6dacacSDimitry Andric auto *EntryBB = BasicBlock::Create(C, "while.entry", RegGlobalsFn); 3847a6dacacSDimitry Andric auto *IfThenBB = BasicBlock::Create(C, "if.then", RegGlobalsFn); 3857a6dacacSDimitry Andric auto *IfElseBB = BasicBlock::Create(C, "if.else", RegGlobalsFn); 3867a6dacacSDimitry Andric auto *SwGlobalBB = BasicBlock::Create(C, "sw.global", RegGlobalsFn); 3877a6dacacSDimitry Andric auto *SwManagedBB = BasicBlock::Create(C, "sw.managed", RegGlobalsFn); 3887a6dacacSDimitry Andric auto *SwSurfaceBB = BasicBlock::Create(C, "sw.surface", RegGlobalsFn); 3897a6dacacSDimitry Andric auto *SwTextureBB = BasicBlock::Create(C, "sw.texture", RegGlobalsFn); 3907a6dacacSDimitry Andric auto *IfEndBB = BasicBlock::Create(C, "if.end", RegGlobalsFn); 3917a6dacacSDimitry Andric auto *ExitBB = BasicBlock::Create(C, "while.end", RegGlobalsFn); 3927a6dacacSDimitry Andric 3937a6dacacSDimitry Andric auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE); 3947a6dacacSDimitry Andric Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB); 3957a6dacacSDimitry Andric Builder.SetInsertPoint(EntryBB); 3967a6dacacSDimitry Andric auto *Entry = Builder.CreatePHI(PointerType::getUnqual(C), 2, "entry"); 3977a6dacacSDimitry Andric auto *AddrPtr = 3987a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 3997a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 4007a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 0)}); 4017a6dacacSDimitry Andric auto *Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr, "addr"); 4027a6dacacSDimitry Andric auto *NamePtr = 4037a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 4047a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 4057a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 1)}); 4067a6dacacSDimitry Andric auto *Name = Builder.CreateLoad(Int8PtrTy, NamePtr, "name"); 4077a6dacacSDimitry Andric auto *SizePtr = 4087a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 4097a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 4107a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 2)}); 4117a6dacacSDimitry Andric auto *Size = Builder.CreateLoad(getSizeTTy(M), SizePtr, "size"); 4127a6dacacSDimitry Andric auto *FlagsPtr = 4137a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 4147a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 4157a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 3)}); 4167a6dacacSDimitry Andric auto *Flags = Builder.CreateLoad(Type::getInt32Ty(C), FlagsPtr, "flags"); 4177a6dacacSDimitry Andric auto *DataPtr = 4187a6dacacSDimitry Andric Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry, 4197a6dacacSDimitry Andric {ConstantInt::get(getSizeTTy(M), 0), 4207a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), 4)}); 4217a6dacacSDimitry Andric auto *Data = Builder.CreateLoad(Type::getInt32Ty(C), DataPtr, "textype"); 4227a6dacacSDimitry Andric auto *Kind = Builder.CreateAnd( 4237a6dacacSDimitry Andric Flags, ConstantInt::get(Type::getInt32Ty(C), 0x7), "type"); 4247a6dacacSDimitry Andric 4257a6dacacSDimitry Andric // Extract the flags stored in the bit-field and convert them to C booleans. 4267a6dacacSDimitry Andric auto *ExternBit = Builder.CreateAnd( 4277a6dacacSDimitry Andric Flags, ConstantInt::get(Type::getInt32Ty(C), 4287a6dacacSDimitry Andric llvm::offloading::OffloadGlobalExtern)); 4297a6dacacSDimitry Andric auto *Extern = Builder.CreateLShr( 4307a6dacacSDimitry Andric ExternBit, ConstantInt::get(Type::getInt32Ty(C), 3), "extern"); 4317a6dacacSDimitry Andric auto *ConstantBit = Builder.CreateAnd( 4327a6dacacSDimitry Andric Flags, ConstantInt::get(Type::getInt32Ty(C), 4337a6dacacSDimitry Andric llvm::offloading::OffloadGlobalConstant)); 4347a6dacacSDimitry Andric auto *Const = Builder.CreateLShr( 4357a6dacacSDimitry Andric ConstantBit, ConstantInt::get(Type::getInt32Ty(C), 4), "constant"); 4367a6dacacSDimitry Andric auto *NormalizedBit = Builder.CreateAnd( 4377a6dacacSDimitry Andric Flags, ConstantInt::get(Type::getInt32Ty(C), 4387a6dacacSDimitry Andric llvm::offloading::OffloadGlobalNormalized)); 4397a6dacacSDimitry Andric auto *Normalized = Builder.CreateLShr( 4407a6dacacSDimitry Andric NormalizedBit, ConstantInt::get(Type::getInt32Ty(C), 5), "normalized"); 4417a6dacacSDimitry Andric auto *FnCond = 4427a6dacacSDimitry Andric Builder.CreateICmpEQ(Size, ConstantInt::getNullValue(getSizeTTy(M))); 4437a6dacacSDimitry Andric Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB); 4447a6dacacSDimitry Andric 4457a6dacacSDimitry Andric // Create kernel registration code. 4467a6dacacSDimitry Andric Builder.SetInsertPoint(IfThenBB); 4477a6dacacSDimitry Andric Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(), Addr, Name, Name, 4487a6dacacSDimitry Andric ConstantInt::get(Type::getInt32Ty(C), -1), 4497a6dacacSDimitry Andric ConstantPointerNull::get(Int8PtrTy), 4507a6dacacSDimitry Andric ConstantPointerNull::get(Int8PtrTy), 4517a6dacacSDimitry Andric ConstantPointerNull::get(Int8PtrTy), 4527a6dacacSDimitry Andric ConstantPointerNull::get(Int8PtrTy), 4537a6dacacSDimitry Andric ConstantPointerNull::get(Int32PtrTy)}); 4547a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 4557a6dacacSDimitry Andric Builder.SetInsertPoint(IfElseBB); 4567a6dacacSDimitry Andric 4577a6dacacSDimitry Andric auto *Switch = Builder.CreateSwitch(Kind, IfEndBB); 4587a6dacacSDimitry Andric // Create global variable registration code. 4597a6dacacSDimitry Andric Builder.SetInsertPoint(SwGlobalBB); 4607a6dacacSDimitry Andric Builder.CreateCall(RegVar, 4617a6dacacSDimitry Andric {RegGlobalsFn->arg_begin(), Addr, Name, Name, Extern, Size, 4627a6dacacSDimitry Andric Const, ConstantInt::get(Type::getInt32Ty(C), 0)}); 4637a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 4647a6dacacSDimitry Andric Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalEntry), 4657a6dacacSDimitry Andric SwGlobalBB); 4667a6dacacSDimitry Andric 4677a6dacacSDimitry Andric // Create managed variable registration code. 4687a6dacacSDimitry Andric Builder.SetInsertPoint(SwManagedBB); 4697a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 4707a6dacacSDimitry Andric Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry), 4717a6dacacSDimitry Andric SwManagedBB); 4727a6dacacSDimitry Andric // Create surface variable registration code. 4737a6dacacSDimitry Andric Builder.SetInsertPoint(SwSurfaceBB); 4747a6dacacSDimitry Andric if (EmitSurfacesAndTextures) 4757a6dacacSDimitry Andric Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(), Addr, Name, Name, 4767a6dacacSDimitry Andric Data, Extern}); 4777a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 4787a6dacacSDimitry Andric Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalSurfaceEntry), 4797a6dacacSDimitry Andric SwSurfaceBB); 4807a6dacacSDimitry Andric 4817a6dacacSDimitry Andric // Create texture variable registration code. 4827a6dacacSDimitry Andric Builder.SetInsertPoint(SwTextureBB); 4837a6dacacSDimitry Andric if (EmitSurfacesAndTextures) 4847a6dacacSDimitry Andric Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(), Addr, Name, Name, 4857a6dacacSDimitry Andric Data, Normalized, Extern}); 4867a6dacacSDimitry Andric Builder.CreateBr(IfEndBB); 4877a6dacacSDimitry Andric Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalTextureEntry), 4887a6dacacSDimitry Andric SwTextureBB); 4897a6dacacSDimitry Andric 4907a6dacacSDimitry Andric Builder.SetInsertPoint(IfEndBB); 4917a6dacacSDimitry Andric auto *NewEntry = Builder.CreateInBoundsGEP( 4927a6dacacSDimitry Andric offloading::getEntryTy(M), Entry, ConstantInt::get(getSizeTTy(M), 1)); 4937a6dacacSDimitry Andric auto *Cmp = Builder.CreateICmpEQ( 4947a6dacacSDimitry Andric NewEntry, 4957a6dacacSDimitry Andric ConstantExpr::getInBoundsGetElementPtr( 4967a6dacacSDimitry Andric ArrayType::get(offloading::getEntryTy(M), 0), EntriesE, 4977a6dacacSDimitry Andric ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0), 4987a6dacacSDimitry Andric ConstantInt::get(getSizeTTy(M), 0)}))); 4997a6dacacSDimitry Andric Entry->addIncoming( 5007a6dacacSDimitry Andric ConstantExpr::getInBoundsGetElementPtr( 5017a6dacacSDimitry Andric ArrayType::get(offloading::getEntryTy(M), 0), EntriesB, 5027a6dacacSDimitry Andric ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0), 5037a6dacacSDimitry Andric ConstantInt::get(getSizeTTy(M), 0)})), 5047a6dacacSDimitry Andric &RegGlobalsFn->getEntryBlock()); 5057a6dacacSDimitry Andric Entry->addIncoming(NewEntry, IfEndBB); 5067a6dacacSDimitry Andric Builder.CreateCondBr(Cmp, ExitBB, EntryBB); 5077a6dacacSDimitry Andric Builder.SetInsertPoint(ExitBB); 5087a6dacacSDimitry Andric Builder.CreateRetVoid(); 5097a6dacacSDimitry Andric 5107a6dacacSDimitry Andric return RegGlobalsFn; 5117a6dacacSDimitry Andric } 5127a6dacacSDimitry Andric 5137a6dacacSDimitry Andric // Create the constructor and destructor to register the fatbinary with the CUDA 5147a6dacacSDimitry Andric // runtime. 5157a6dacacSDimitry Andric void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc, 5167a6dacacSDimitry Andric bool IsHIP, EntryArrayTy EntryArray, 5177a6dacacSDimitry Andric StringRef Suffix, 5187a6dacacSDimitry Andric bool EmitSurfacesAndTextures) { 5197a6dacacSDimitry Andric LLVMContext &C = M.getContext(); 5207a6dacacSDimitry Andric auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 5217a6dacacSDimitry Andric auto *CtorFunc = Function::Create( 5227a6dacacSDimitry Andric CtorFuncTy, GlobalValue::InternalLinkage, 5237a6dacacSDimitry Andric (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M); 5247a6dacacSDimitry Andric CtorFunc->setSection(".text.startup"); 5257a6dacacSDimitry Andric 5267a6dacacSDimitry Andric auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); 5277a6dacacSDimitry Andric auto *DtorFunc = Function::Create( 5287a6dacacSDimitry Andric DtorFuncTy, GlobalValue::InternalLinkage, 5297a6dacacSDimitry Andric (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M); 5307a6dacacSDimitry Andric DtorFunc->setSection(".text.startup"); 5317a6dacacSDimitry Andric 5327a6dacacSDimitry Andric auto *PtrTy = PointerType::getUnqual(C); 5337a6dacacSDimitry Andric 5347a6dacacSDimitry Andric // Get the __cudaRegisterFatBinary function declaration. 5357a6dacacSDimitry Andric auto *RegFatTy = FunctionType::get(PtrTy, PtrTy, /*isVarArg=*/false); 5367a6dacacSDimitry Andric FunctionCallee RegFatbin = M.getOrInsertFunction( 5377a6dacacSDimitry Andric IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy); 5387a6dacacSDimitry Andric // Get the __cudaRegisterFatBinaryEnd function declaration. 5397a6dacacSDimitry Andric auto *RegFatEndTy = 5407a6dacacSDimitry Andric FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false); 5417a6dacacSDimitry Andric FunctionCallee RegFatbinEnd = 5427a6dacacSDimitry Andric M.getOrInsertFunction("__cudaRegisterFatBinaryEnd", RegFatEndTy); 5437a6dacacSDimitry Andric // Get the __cudaUnregisterFatBinary function declaration. 5447a6dacacSDimitry Andric auto *UnregFatTy = 5457a6dacacSDimitry Andric FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false); 5467a6dacacSDimitry Andric FunctionCallee UnregFatbin = M.getOrInsertFunction( 5477a6dacacSDimitry Andric IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary", 5487a6dacacSDimitry Andric UnregFatTy); 5497a6dacacSDimitry Andric 5507a6dacacSDimitry Andric auto *AtExitTy = 5517a6dacacSDimitry Andric FunctionType::get(Type::getInt32Ty(C), PtrTy, /*isVarArg=*/false); 5527a6dacacSDimitry Andric FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy); 5537a6dacacSDimitry Andric 5547a6dacacSDimitry Andric auto *BinaryHandleGlobal = new llvm::GlobalVariable( 5557a6dacacSDimitry Andric M, PtrTy, false, llvm::GlobalValue::InternalLinkage, 5567a6dacacSDimitry Andric llvm::ConstantPointerNull::get(PtrTy), 5577a6dacacSDimitry Andric (IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle") + Suffix); 5587a6dacacSDimitry Andric 5597a6dacacSDimitry Andric // Create the constructor to register this image with the runtime. 5607a6dacacSDimitry Andric IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc)); 5617a6dacacSDimitry Andric CallInst *Handle = CtorBuilder.CreateCall( 5627a6dacacSDimitry Andric RegFatbin, 5637a6dacacSDimitry Andric ConstantExpr::getPointerBitCastOrAddrSpaceCast(FatbinDesc, PtrTy)); 5647a6dacacSDimitry Andric CtorBuilder.CreateAlignedStore( 5657a6dacacSDimitry Andric Handle, BinaryHandleGlobal, 5667a6dacacSDimitry Andric Align(M.getDataLayout().getPointerTypeSize(PtrTy))); 5677a6dacacSDimitry Andric CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray, 5687a6dacacSDimitry Andric Suffix, 5697a6dacacSDimitry Andric EmitSurfacesAndTextures), 5707a6dacacSDimitry Andric Handle); 5717a6dacacSDimitry Andric if (!IsHIP) 5727a6dacacSDimitry Andric CtorBuilder.CreateCall(RegFatbinEnd, Handle); 5737a6dacacSDimitry Andric CtorBuilder.CreateCall(AtExit, DtorFunc); 5747a6dacacSDimitry Andric CtorBuilder.CreateRetVoid(); 5757a6dacacSDimitry Andric 5767a6dacacSDimitry Andric // Create the destructor to unregister the image with the runtime. We cannot 5777a6dacacSDimitry Andric // use a standard global destructor after CUDA 9.2 so this must be called by 5787a6dacacSDimitry Andric // `atexit()` intead. 5797a6dacacSDimitry Andric IRBuilder<> DtorBuilder(BasicBlock::Create(C, "entry", DtorFunc)); 5807a6dacacSDimitry Andric LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad( 5817a6dacacSDimitry Andric PtrTy, BinaryHandleGlobal, 5827a6dacacSDimitry Andric Align(M.getDataLayout().getPointerTypeSize(PtrTy))); 5837a6dacacSDimitry Andric DtorBuilder.CreateCall(UnregFatbin, BinaryHandle); 5847a6dacacSDimitry Andric DtorBuilder.CreateRetVoid(); 5857a6dacacSDimitry Andric 5867a6dacacSDimitry Andric // Add this function to constructors. 587*0fca6ea1SDimitry Andric appendToGlobalCtors(M, CtorFunc, /*Priority=*/101); 5887a6dacacSDimitry Andric } 5897a6dacacSDimitry Andric } // namespace 5907a6dacacSDimitry Andric 5917a6dacacSDimitry Andric Error offloading::wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images, 5927a6dacacSDimitry Andric EntryArrayTy EntryArray, 593*0fca6ea1SDimitry Andric llvm::StringRef Suffix, bool Relocatable) { 594*0fca6ea1SDimitry Andric GlobalVariable *Desc = 595*0fca6ea1SDimitry Andric createBinDesc(M, Images, EntryArray, Suffix, Relocatable); 5967a6dacacSDimitry Andric if (!Desc) 5977a6dacacSDimitry Andric return createStringError(inconvertibleErrorCode(), 5987a6dacacSDimitry Andric "No binary descriptors created."); 5997a6dacacSDimitry Andric createRegisterFunction(M, Desc, Suffix); 6007a6dacacSDimitry Andric return Error::success(); 6017a6dacacSDimitry Andric } 6027a6dacacSDimitry Andric 6037a6dacacSDimitry Andric Error offloading::wrapCudaBinary(Module &M, ArrayRef<char> Image, 6047a6dacacSDimitry Andric EntryArrayTy EntryArray, 6057a6dacacSDimitry Andric llvm::StringRef Suffix, 6067a6dacacSDimitry Andric bool EmitSurfacesAndTextures) { 6077a6dacacSDimitry Andric GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/false, Suffix); 6087a6dacacSDimitry Andric if (!Desc) 6097a6dacacSDimitry Andric return createStringError(inconvertibleErrorCode(), 6107a6dacacSDimitry Andric "No fatbin section created."); 6117a6dacacSDimitry Andric 6127a6dacacSDimitry Andric createRegisterFatbinFunction(M, Desc, /*IsHip=*/false, EntryArray, Suffix, 6137a6dacacSDimitry Andric EmitSurfacesAndTextures); 6147a6dacacSDimitry Andric return Error::success(); 6157a6dacacSDimitry Andric } 6167a6dacacSDimitry Andric 6177a6dacacSDimitry Andric Error offloading::wrapHIPBinary(Module &M, ArrayRef<char> Image, 6187a6dacacSDimitry Andric EntryArrayTy EntryArray, llvm::StringRef Suffix, 6197a6dacacSDimitry Andric bool EmitSurfacesAndTextures) { 6207a6dacacSDimitry Andric GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/true, Suffix); 6217a6dacacSDimitry Andric if (!Desc) 6227a6dacacSDimitry Andric return createStringError(inconvertibleErrorCode(), 6237a6dacacSDimitry Andric "No fatbin section created."); 6247a6dacacSDimitry Andric 6257a6dacacSDimitry Andric createRegisterFatbinFunction(M, Desc, /*IsHip=*/true, EntryArray, Suffix, 6267a6dacacSDimitry Andric EmitSurfacesAndTextures); 6277a6dacacSDimitry Andric return Error::success(); 6287a6dacacSDimitry Andric } 629