xref: /llvm-project/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp (revision 13dcc95dcd4999ff99f2de89d881f1aed5b21709)
19fa9d9a7SFabian Mora //===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===//
29fa9d9a7SFabian Mora //
39fa9d9a7SFabian Mora // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49fa9d9a7SFabian Mora // See https://llvm.org/LICENSE.txt for license information.
59fa9d9a7SFabian Mora // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69fa9d9a7SFabian Mora //
79fa9d9a7SFabian Mora //===----------------------------------------------------------------------===//
89fa9d9a7SFabian Mora 
99fa9d9a7SFabian Mora #include "llvm/Frontend/Offloading/OffloadWrapper.h"
109fa9d9a7SFabian Mora #include "llvm/ADT/ArrayRef.h"
119fa9d9a7SFabian Mora #include "llvm/BinaryFormat/Magic.h"
129fa9d9a7SFabian Mora #include "llvm/Frontend/Offloading/Utility.h"
139fa9d9a7SFabian Mora #include "llvm/IR/Constants.h"
149fa9d9a7SFabian Mora #include "llvm/IR/GlobalVariable.h"
159fa9d9a7SFabian Mora #include "llvm/IR/IRBuilder.h"
169fa9d9a7SFabian Mora #include "llvm/IR/LLVMContext.h"
179fa9d9a7SFabian Mora #include "llvm/IR/Module.h"
189fa9d9a7SFabian Mora #include "llvm/Object/OffloadBinary.h"
199fa9d9a7SFabian Mora #include "llvm/Support/Error.h"
209fa9d9a7SFabian Mora #include "llvm/TargetParser/Triple.h"
219fa9d9a7SFabian Mora #include "llvm/Transforms/Utils/ModuleUtils.h"
229fa9d9a7SFabian Mora 
239fa9d9a7SFabian Mora using namespace llvm;
249fa9d9a7SFabian Mora using namespace llvm::offloading;
259fa9d9a7SFabian Mora 
269fa9d9a7SFabian Mora namespace {
279fa9d9a7SFabian Mora /// Magic number that begins the section containing the CUDA fatbinary.
289fa9d9a7SFabian Mora constexpr unsigned CudaFatMagic = 0x466243b1;
299fa9d9a7SFabian Mora constexpr unsigned HIPFatMagic = 0x48495046;
309fa9d9a7SFabian Mora 
319fa9d9a7SFabian Mora IntegerType *getSizeTTy(Module &M) {
329fa9d9a7SFabian Mora   return M.getDataLayout().getIntPtrType(M.getContext());
339fa9d9a7SFabian Mora }
349fa9d9a7SFabian Mora 
359fa9d9a7SFabian Mora // struct __tgt_device_image {
369fa9d9a7SFabian Mora //   void *ImageStart;
379fa9d9a7SFabian Mora //   void *ImageEnd;
389fa9d9a7SFabian Mora //   __tgt_offload_entry *EntriesBegin;
399fa9d9a7SFabian Mora //   __tgt_offload_entry *EntriesEnd;
409fa9d9a7SFabian Mora // };
419fa9d9a7SFabian Mora StructType *getDeviceImageTy(Module &M) {
429fa9d9a7SFabian Mora   LLVMContext &C = M.getContext();
439fa9d9a7SFabian Mora   StructType *ImageTy = StructType::getTypeByName(C, "__tgt_device_image");
449fa9d9a7SFabian Mora   if (!ImageTy)
459fa9d9a7SFabian Mora     ImageTy =
469fa9d9a7SFabian Mora         StructType::create("__tgt_device_image", PointerType::getUnqual(C),
479fa9d9a7SFabian Mora                            PointerType::getUnqual(C), PointerType::getUnqual(C),
489fa9d9a7SFabian Mora                            PointerType::getUnqual(C));
499fa9d9a7SFabian Mora   return ImageTy;
509fa9d9a7SFabian Mora }
519fa9d9a7SFabian Mora 
529fa9d9a7SFabian Mora PointerType *getDeviceImagePtrTy(Module &M) {
53d7c14c8fSMats Jun Larsen   return PointerType::getUnqual(M.getContext());
549fa9d9a7SFabian Mora }
559fa9d9a7SFabian Mora 
569fa9d9a7SFabian Mora // struct __tgt_bin_desc {
579fa9d9a7SFabian Mora //   int32_t NumDeviceImages;
589fa9d9a7SFabian Mora //   __tgt_device_image *DeviceImages;
599fa9d9a7SFabian Mora //   __tgt_offload_entry *HostEntriesBegin;
609fa9d9a7SFabian Mora //   __tgt_offload_entry *HostEntriesEnd;
619fa9d9a7SFabian Mora // };
629fa9d9a7SFabian Mora StructType *getBinDescTy(Module &M) {
639fa9d9a7SFabian Mora   LLVMContext &C = M.getContext();
649fa9d9a7SFabian Mora   StructType *DescTy = StructType::getTypeByName(C, "__tgt_bin_desc");
659fa9d9a7SFabian Mora   if (!DescTy)
669fa9d9a7SFabian Mora     DescTy = StructType::create(
679fa9d9a7SFabian Mora         "__tgt_bin_desc", Type::getInt32Ty(C), getDeviceImagePtrTy(M),
689fa9d9a7SFabian Mora         PointerType::getUnqual(C), PointerType::getUnqual(C));
699fa9d9a7SFabian Mora   return DescTy;
709fa9d9a7SFabian Mora }
719fa9d9a7SFabian Mora 
729fa9d9a7SFabian Mora PointerType *getBinDescPtrTy(Module &M) {
73d7c14c8fSMats Jun Larsen   return PointerType::getUnqual(M.getContext());
749fa9d9a7SFabian Mora }
759fa9d9a7SFabian Mora 
769fa9d9a7SFabian Mora /// Creates binary descriptor for the given device images. Binary descriptor
779fa9d9a7SFabian Mora /// is an object that is passed to the offloading runtime at program startup
789fa9d9a7SFabian Mora /// and it describes all device images available in the executable or shared
799fa9d9a7SFabian Mora /// library. It is defined as follows
809fa9d9a7SFabian Mora ///
819fa9d9a7SFabian Mora /// __attribute__((visibility("hidden")))
829fa9d9a7SFabian Mora /// extern __tgt_offload_entry *__start_omp_offloading_entries;
839fa9d9a7SFabian Mora /// __attribute__((visibility("hidden")))
849fa9d9a7SFabian Mora /// extern __tgt_offload_entry *__stop_omp_offloading_entries;
859fa9d9a7SFabian Mora ///
869fa9d9a7SFabian Mora /// static const char Image0[] = { <Bufs.front() contents> };
879fa9d9a7SFabian Mora ///  ...
889fa9d9a7SFabian Mora /// static const char ImageN[] = { <Bufs.back() contents> };
899fa9d9a7SFabian Mora ///
909fa9d9a7SFabian Mora /// static const __tgt_device_image Images[] = {
919fa9d9a7SFabian Mora ///   {
929fa9d9a7SFabian Mora ///     Image0,                            /*ImageStart*/
939fa9d9a7SFabian Mora ///     Image0 + sizeof(Image0),           /*ImageEnd*/
949fa9d9a7SFabian Mora ///     __start_omp_offloading_entries,    /*EntriesBegin*/
959fa9d9a7SFabian Mora ///     __stop_omp_offloading_entries      /*EntriesEnd*/
969fa9d9a7SFabian Mora ///   },
979fa9d9a7SFabian Mora ///   ...
989fa9d9a7SFabian Mora ///   {
999fa9d9a7SFabian Mora ///     ImageN,                            /*ImageStart*/
1009fa9d9a7SFabian Mora ///     ImageN + sizeof(ImageN),           /*ImageEnd*/
1019fa9d9a7SFabian Mora ///     __start_omp_offloading_entries,    /*EntriesBegin*/
1029fa9d9a7SFabian Mora ///     __stop_omp_offloading_entries      /*EntriesEnd*/
1039fa9d9a7SFabian Mora ///   }
1049fa9d9a7SFabian Mora /// };
1059fa9d9a7SFabian Mora ///
1069fa9d9a7SFabian Mora /// static const __tgt_bin_desc BinDesc = {
1079fa9d9a7SFabian Mora ///   sizeof(Images) / sizeof(Images[0]),  /*NumDeviceImages*/
1089fa9d9a7SFabian Mora ///   Images,                              /*DeviceImages*/
1099fa9d9a7SFabian Mora ///   __start_omp_offloading_entries,      /*HostEntriesBegin*/
1109fa9d9a7SFabian Mora ///   __stop_omp_offloading_entries        /*HostEntriesEnd*/
1119fa9d9a7SFabian Mora /// };
1129fa9d9a7SFabian Mora ///
1139fa9d9a7SFabian Mora /// Global variable that represents BinDesc is returned.
1149fa9d9a7SFabian Mora GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
1155c840542SJoseph Huber                               EntryArrayTy EntryArray, StringRef Suffix,
1165c840542SJoseph Huber                               bool Relocatable) {
1179fa9d9a7SFabian Mora   LLVMContext &C = M.getContext();
1189fa9d9a7SFabian Mora   auto [EntriesB, EntriesE] = EntryArray;
1199fa9d9a7SFabian Mora 
1209fa9d9a7SFabian Mora   auto *Zero = ConstantInt::get(getSizeTTy(M), 0u);
1219fa9d9a7SFabian Mora   Constant *ZeroZero[] = {Zero, Zero};
1229fa9d9a7SFabian Mora 
1239fa9d9a7SFabian Mora   // Create initializer for the images array.
1249fa9d9a7SFabian Mora   SmallVector<Constant *, 4u> ImagesInits;
1259fa9d9a7SFabian Mora   ImagesInits.reserve(Bufs.size());
1269fa9d9a7SFabian Mora   for (ArrayRef<char> Buf : Bufs) {
1279fa9d9a7SFabian Mora     // We embed the full offloading entry so the binary utilities can parse it.
1289fa9d9a7SFabian Mora     auto *Data = ConstantDataArray::get(C, Buf);
1299fa9d9a7SFabian Mora     auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant=*/true,
1309fa9d9a7SFabian Mora                                      GlobalVariable::InternalLinkage, Data,
1319fa9d9a7SFabian Mora                                      ".omp_offloading.device_image" + Suffix);
1329fa9d9a7SFabian Mora     Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1335c840542SJoseph Huber     Image->setSection(Relocatable ? ".llvm.offloading.relocatable"
1345c840542SJoseph Huber                                   : ".llvm.offloading");
1359fa9d9a7SFabian Mora     Image->setAlignment(Align(object::OffloadBinary::getAlignment()));
1369fa9d9a7SFabian Mora 
1379fa9d9a7SFabian Mora     StringRef Binary(Buf.data(), Buf.size());
1389fa9d9a7SFabian Mora     assert(identify_magic(Binary) == file_magic::offload_binary &&
1399fa9d9a7SFabian Mora            "Invalid binary format");
1409fa9d9a7SFabian Mora 
1419fa9d9a7SFabian Mora     // The device image struct contains the pointer to the beginning and end of
1429fa9d9a7SFabian Mora     // the image stored inside of the offload binary. There should only be one
1439fa9d9a7SFabian Mora     // of these for each buffer so we parse it out manually.
1449fa9d9a7SFabian Mora     const auto *Header =
1459fa9d9a7SFabian Mora         reinterpret_cast<const object::OffloadBinary::Header *>(
1469fa9d9a7SFabian Mora             Binary.bytes_begin());
1479fa9d9a7SFabian Mora     const auto *Entry = reinterpret_cast<const object::OffloadBinary::Entry *>(
1489fa9d9a7SFabian Mora         Binary.bytes_begin() + Header->EntryOffset);
1499fa9d9a7SFabian Mora 
1509fa9d9a7SFabian Mora     auto *Begin = ConstantInt::get(getSizeTTy(M), Entry->ImageOffset);
1519fa9d9a7SFabian Mora     auto *Size =
1529fa9d9a7SFabian Mora         ConstantInt::get(getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize);
1539fa9d9a7SFabian Mora     Constant *ZeroBegin[] = {Zero, Begin};
1549fa9d9a7SFabian Mora     Constant *ZeroSize[] = {Zero, Size};
1559fa9d9a7SFabian Mora 
1569fa9d9a7SFabian Mora     auto *ImageB =
1579fa9d9a7SFabian Mora         ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroBegin);
1589fa9d9a7SFabian Mora     auto *ImageE =
1599fa9d9a7SFabian Mora         ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroSize);
1609fa9d9a7SFabian Mora 
1619fa9d9a7SFabian Mora     ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB,
1629fa9d9a7SFabian Mora                                               ImageE, EntriesB, EntriesE));
1639fa9d9a7SFabian Mora   }
1649fa9d9a7SFabian Mora 
1659fa9d9a7SFabian Mora   // Then create images array.
1669fa9d9a7SFabian Mora   auto *ImagesData = ConstantArray::get(
1679fa9d9a7SFabian Mora       ArrayType::get(getDeviceImageTy(M), ImagesInits.size()), ImagesInits);
1689fa9d9a7SFabian Mora 
1699fa9d9a7SFabian Mora   auto *Images =
1709fa9d9a7SFabian Mora       new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
1719fa9d9a7SFabian Mora                          GlobalValue::InternalLinkage, ImagesData,
1729fa9d9a7SFabian Mora                          ".omp_offloading.device_images" + Suffix);
1739fa9d9a7SFabian Mora   Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1749fa9d9a7SFabian Mora 
1759fa9d9a7SFabian Mora   auto *ImagesB =
1769fa9d9a7SFabian Mora       ConstantExpr::getGetElementPtr(Images->getValueType(), Images, ZeroZero);
1779fa9d9a7SFabian Mora 
1789fa9d9a7SFabian Mora   // And finally create the binary descriptor object.
1799fa9d9a7SFabian Mora   auto *DescInit = ConstantStruct::get(
1809fa9d9a7SFabian Mora       getBinDescTy(M),
1819fa9d9a7SFabian Mora       ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB,
1829fa9d9a7SFabian Mora       EntriesB, EntriesE);
1839fa9d9a7SFabian Mora 
184*13dcc95dSJoseph Huber   return new GlobalVariable(M, DescInit->getType(), /*isConstant=*/true,
1859fa9d9a7SFabian Mora                             GlobalValue::InternalLinkage, DescInit,
1869fa9d9a7SFabian Mora                             ".omp_offloading.descriptor" + Suffix);
1879fa9d9a7SFabian Mora }
1889fa9d9a7SFabian Mora 
189421085fdSJoseph Huber Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
1909fa9d9a7SFabian Mora                                    StringRef Suffix) {
1919fa9d9a7SFabian Mora   LLVMContext &C = M.getContext();
1929fa9d9a7SFabian Mora   auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
1939fa9d9a7SFabian Mora   auto *Func =
1949fa9d9a7SFabian Mora       Function::Create(FuncTy, GlobalValue::InternalLinkage,
1959fa9d9a7SFabian Mora                        ".omp_offloading.descriptor_unreg" + Suffix, &M);
1969fa9d9a7SFabian Mora   Func->setSection(".text.startup");
1979fa9d9a7SFabian Mora 
1989fa9d9a7SFabian Mora   // Get __tgt_unregister_lib function declaration.
1999fa9d9a7SFabian Mora   auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
2009fa9d9a7SFabian Mora                                         /*isVarArg*/ false);
2019fa9d9a7SFabian Mora   FunctionCallee UnRegFuncC =
2029fa9d9a7SFabian Mora       M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
2039fa9d9a7SFabian Mora 
2049fa9d9a7SFabian Mora   // Construct function body
2059fa9d9a7SFabian Mora   IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
2069fa9d9a7SFabian Mora   Builder.CreateCall(UnRegFuncC, BinDesc);
2079fa9d9a7SFabian Mora   Builder.CreateRetVoid();
2089fa9d9a7SFabian Mora 
209421085fdSJoseph Huber   return Func;
210421085fdSJoseph Huber }
211421085fdSJoseph Huber 
212421085fdSJoseph Huber void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
213421085fdSJoseph Huber                             StringRef Suffix) {
214421085fdSJoseph Huber   LLVMContext &C = M.getContext();
215421085fdSJoseph Huber   auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
216421085fdSJoseph Huber   auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
217421085fdSJoseph Huber                                 ".omp_offloading.descriptor_reg" + Suffix, &M);
218421085fdSJoseph Huber   Func->setSection(".text.startup");
219421085fdSJoseph Huber 
220421085fdSJoseph Huber   // Get __tgt_register_lib function declaration.
221421085fdSJoseph Huber   auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
222421085fdSJoseph Huber                                       /*isVarArg*/ false);
223421085fdSJoseph Huber   FunctionCallee RegFuncC =
224421085fdSJoseph Huber       M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
225421085fdSJoseph Huber 
226421085fdSJoseph Huber   auto *AtExitTy = FunctionType::get(
227421085fdSJoseph Huber       Type::getInt32Ty(C), PointerType::getUnqual(C), /*isVarArg=*/false);
228421085fdSJoseph Huber   FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
229421085fdSJoseph Huber 
230421085fdSJoseph Huber   Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
231421085fdSJoseph Huber 
232421085fdSJoseph Huber   // Construct function body
233421085fdSJoseph Huber   IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
234421085fdSJoseph Huber 
235fa9e90f5SJoseph Huber   Builder.CreateCall(RegFuncC, BinDesc);
236fa9e90f5SJoseph Huber 
237421085fdSJoseph Huber   // Register the destructors with 'atexit'. This is expected by the CUDA
238421085fdSJoseph Huber   // runtime and ensures that we clean up before dynamic objects are destroyed.
239fa9e90f5SJoseph Huber   // This needs to be done after plugin initialization to ensure that it is
240fa9e90f5SJoseph Huber   // called before the plugin runtime is destroyed.
241421085fdSJoseph Huber   Builder.CreateCall(AtExit, UnregFunc);
242421085fdSJoseph Huber   Builder.CreateRetVoid();
243421085fdSJoseph Huber 
244421085fdSJoseph Huber   // Add this function to constructors.
245421085fdSJoseph Huber   appendToGlobalCtors(M, Func, /*Priority=*/101);
2469fa9d9a7SFabian Mora }
2479fa9d9a7SFabian Mora 
2489fa9d9a7SFabian Mora // struct fatbin_wrapper {
2499fa9d9a7SFabian Mora //  int32_t magic;
2509fa9d9a7SFabian Mora //  int32_t version;
2519fa9d9a7SFabian Mora //  void *image;
2529fa9d9a7SFabian Mora //  void *reserved;
2539fa9d9a7SFabian Mora //};
2549fa9d9a7SFabian Mora StructType *getFatbinWrapperTy(Module &M) {
2559fa9d9a7SFabian Mora   LLVMContext &C = M.getContext();
2569fa9d9a7SFabian Mora   StructType *FatbinTy = StructType::getTypeByName(C, "fatbin_wrapper");
2579fa9d9a7SFabian Mora   if (!FatbinTy)
2589fa9d9a7SFabian Mora     FatbinTy = StructType::create(
2599fa9d9a7SFabian Mora         "fatbin_wrapper", Type::getInt32Ty(C), Type::getInt32Ty(C),
2609fa9d9a7SFabian Mora         PointerType::getUnqual(C), PointerType::getUnqual(C));
2619fa9d9a7SFabian Mora   return FatbinTy;
2629fa9d9a7SFabian Mora }
2639fa9d9a7SFabian Mora 
2649fa9d9a7SFabian Mora /// Embed the image \p Image into the module \p M so it can be found by the
2659fa9d9a7SFabian Mora /// runtime.
2669fa9d9a7SFabian Mora GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
2679fa9d9a7SFabian Mora                                  StringRef Suffix) {
2689fa9d9a7SFabian Mora   LLVMContext &C = M.getContext();
2699fa9d9a7SFabian Mora   llvm::Type *Int8PtrTy = PointerType::getUnqual(C);
2709fa9d9a7SFabian Mora   llvm::Triple Triple = llvm::Triple(M.getTargetTriple());
2719fa9d9a7SFabian Mora 
2729fa9d9a7SFabian Mora   // Create the global string containing the fatbinary.
2739fa9d9a7SFabian Mora   StringRef FatbinConstantSection =
2749fa9d9a7SFabian Mora       IsHIP ? ".hip_fatbin"
2759fa9d9a7SFabian Mora             : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
2769fa9d9a7SFabian Mora   auto *Data = ConstantDataArray::get(C, Image);
2779fa9d9a7SFabian Mora   auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
2789fa9d9a7SFabian Mora                                     GlobalVariable::InternalLinkage, Data,
2799fa9d9a7SFabian Mora                                     ".fatbin_image" + Suffix);
2809fa9d9a7SFabian Mora   Fatbin->setSection(FatbinConstantSection);
2819fa9d9a7SFabian Mora 
2829fa9d9a7SFabian Mora   // Create the fatbinary wrapper
2839fa9d9a7SFabian Mora   StringRef FatbinWrapperSection = IsHIP               ? ".hipFatBinSegment"
2849fa9d9a7SFabian Mora                                    : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
2859fa9d9a7SFabian Mora                                                        : ".nvFatBinSegment";
2869fa9d9a7SFabian Mora   Constant *FatbinWrapper[] = {
2879fa9d9a7SFabian Mora       ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
2889fa9d9a7SFabian Mora       ConstantInt::get(Type::getInt32Ty(C), 1),
2899fa9d9a7SFabian Mora       ConstantExpr::getPointerBitCastOrAddrSpaceCast(Fatbin, Int8PtrTy),
2909fa9d9a7SFabian Mora       ConstantPointerNull::get(PointerType::getUnqual(C))};
2919fa9d9a7SFabian Mora 
2929fa9d9a7SFabian Mora   Constant *FatbinInitializer =
2939fa9d9a7SFabian Mora       ConstantStruct::get(getFatbinWrapperTy(M), FatbinWrapper);
2949fa9d9a7SFabian Mora 
2959fa9d9a7SFabian Mora   auto *FatbinDesc =
2969fa9d9a7SFabian Mora       new GlobalVariable(M, getFatbinWrapperTy(M),
2979fa9d9a7SFabian Mora                          /*isConstant*/ true, GlobalValue::InternalLinkage,
2989fa9d9a7SFabian Mora                          FatbinInitializer, ".fatbin_wrapper" + Suffix);
2999fa9d9a7SFabian Mora   FatbinDesc->setSection(FatbinWrapperSection);
3009fa9d9a7SFabian Mora   FatbinDesc->setAlignment(Align(8));
3019fa9d9a7SFabian Mora 
3029fa9d9a7SFabian Mora   return FatbinDesc;
3039fa9d9a7SFabian Mora }
3049fa9d9a7SFabian Mora 
3059fa9d9a7SFabian Mora /// Create the register globals function. We will iterate all of the offloading
3069fa9d9a7SFabian Mora /// entries stored at the begin / end symbols and register them according to
3079fa9d9a7SFabian Mora /// their type. This creates the following function in IR:
3089fa9d9a7SFabian Mora ///
3099fa9d9a7SFabian Mora /// extern struct __tgt_offload_entry __start_cuda_offloading_entries;
3109fa9d9a7SFabian Mora /// extern struct __tgt_offload_entry __stop_cuda_offloading_entries;
3119fa9d9a7SFabian Mora ///
3129fa9d9a7SFabian Mora /// extern void __cudaRegisterFunction(void **, void *, void *, void *, int,
3139fa9d9a7SFabian Mora ///                                    void *, void *, void *, void *, int *);
3149fa9d9a7SFabian Mora /// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t,
3159fa9d9a7SFabian Mora ///                               int64_t, int32_t, int32_t);
3169fa9d9a7SFabian Mora ///
3179fa9d9a7SFabian Mora /// void __cudaRegisterTest(void **fatbinHandle) {
3189fa9d9a7SFabian Mora ///   for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries;
3199fa9d9a7SFabian Mora ///        entry != &__stop_cuda_offloading_entries; ++entry) {
320*13dcc95dSJoseph Huber ///     if (entry->Kind != OFK_CUDA)
321*13dcc95dSJoseph Huber ///       continue
322*13dcc95dSJoseph Huber ///
323*13dcc95dSJoseph Huber ///     if (!entry->Size)
3249fa9d9a7SFabian Mora ///       __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name,
3259fa9d9a7SFabian Mora ///                              entry->name, -1, 0, 0, 0, 0, 0);
3269fa9d9a7SFabian Mora ///     else
3279fa9d9a7SFabian Mora ///       __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name,
3289fa9d9a7SFabian Mora ///                         0, entry->size, 0, 0);
3299fa9d9a7SFabian Mora ///   }
3309fa9d9a7SFabian Mora /// }
3319fa9d9a7SFabian Mora Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
3329fa9d9a7SFabian Mora                                         EntryArrayTy EntryArray,
3339fa9d9a7SFabian Mora                                         StringRef Suffix,
3349fa9d9a7SFabian Mora                                         bool EmitSurfacesAndTextures) {
3359fa9d9a7SFabian Mora   LLVMContext &C = M.getContext();
3369fa9d9a7SFabian Mora   auto [EntriesB, EntriesE] = EntryArray;
3379fa9d9a7SFabian Mora 
3389fa9d9a7SFabian Mora   // Get the __cudaRegisterFunction function declaration.
3399fa9d9a7SFabian Mora   PointerType *Int8PtrTy = PointerType::get(C, 0);
3409fa9d9a7SFabian Mora   PointerType *Int8PtrPtrTy = PointerType::get(C, 0);
3419fa9d9a7SFabian Mora   PointerType *Int32PtrTy = PointerType::get(C, 0);
3429fa9d9a7SFabian Mora   auto *RegFuncTy = FunctionType::get(
3439fa9d9a7SFabian Mora       Type::getInt32Ty(C),
3449fa9d9a7SFabian Mora       {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
3459fa9d9a7SFabian Mora        Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy},
3469fa9d9a7SFabian Mora       /*isVarArg*/ false);
3479fa9d9a7SFabian Mora   FunctionCallee RegFunc = M.getOrInsertFunction(
3489fa9d9a7SFabian Mora       IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy);
3499fa9d9a7SFabian Mora 
3509fa9d9a7SFabian Mora   // Get the __cudaRegisterVar function declaration.
3519fa9d9a7SFabian Mora   auto *RegVarTy = FunctionType::get(
3529fa9d9a7SFabian Mora       Type::getVoidTy(C),
3539fa9d9a7SFabian Mora       {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
3549fa9d9a7SFabian Mora        getSizeTTy(M), Type::getInt32Ty(C), Type::getInt32Ty(C)},
3559fa9d9a7SFabian Mora       /*isVarArg*/ false);
3569fa9d9a7SFabian Mora   FunctionCallee RegVar = M.getOrInsertFunction(
3579fa9d9a7SFabian Mora       IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
3589fa9d9a7SFabian Mora 
3599fa9d9a7SFabian Mora   // Get the __cudaRegisterSurface function declaration.
36070a16b90SJoseph Huber   FunctionType *RegManagedVarTy =
36170a16b90SJoseph Huber       FunctionType::get(Type::getVoidTy(C),
36270a16b90SJoseph Huber                         {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
36370a16b90SJoseph Huber                          getSizeTTy(M), Type::getInt32Ty(C)},
36470a16b90SJoseph Huber                         /*isVarArg=*/false);
36570a16b90SJoseph Huber   FunctionCallee RegManagedVar = M.getOrInsertFunction(
36670a16b90SJoseph Huber       IsHIP ? "__hipRegisterManagedVar" : "__cudaRegisterManagedVar",
36770a16b90SJoseph Huber       RegManagedVarTy);
36870a16b90SJoseph Huber 
36970a16b90SJoseph Huber   // Get the __cudaRegisterSurface function declaration.
3709fa9d9a7SFabian Mora   FunctionType *RegSurfaceTy =
3719fa9d9a7SFabian Mora       FunctionType::get(Type::getVoidTy(C),
3729fa9d9a7SFabian Mora                         {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
3739fa9d9a7SFabian Mora                          Type::getInt32Ty(C), Type::getInt32Ty(C)},
3749fa9d9a7SFabian Mora                         /*isVarArg=*/false);
3759fa9d9a7SFabian Mora   FunctionCallee RegSurface = M.getOrInsertFunction(
3769fa9d9a7SFabian Mora       IsHIP ? "__hipRegisterSurface" : "__cudaRegisterSurface", RegSurfaceTy);
3779fa9d9a7SFabian Mora 
3789fa9d9a7SFabian Mora   // Get the __cudaRegisterTexture function declaration.
3799fa9d9a7SFabian Mora   FunctionType *RegTextureTy = FunctionType::get(
3809fa9d9a7SFabian Mora       Type::getVoidTy(C),
3819fa9d9a7SFabian Mora       {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
3829fa9d9a7SFabian Mora        Type::getInt32Ty(C), Type::getInt32Ty(C)},
3839fa9d9a7SFabian Mora       /*isVarArg=*/false);
3849fa9d9a7SFabian Mora   FunctionCallee RegTexture = M.getOrInsertFunction(
3859fa9d9a7SFabian Mora       IsHIP ? "__hipRegisterTexture" : "__cudaRegisterTexture", RegTextureTy);
3869fa9d9a7SFabian Mora 
3879fa9d9a7SFabian Mora   auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C), Int8PtrPtrTy,
3889fa9d9a7SFabian Mora                                          /*isVarArg*/ false);
3899fa9d9a7SFabian Mora   auto *RegGlobalsFn =
3909fa9d9a7SFabian Mora       Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage,
3919fa9d9a7SFabian Mora                        IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
3929fa9d9a7SFabian Mora   RegGlobalsFn->setSection(".text.startup");
3939fa9d9a7SFabian Mora 
3949fa9d9a7SFabian Mora   // Create the loop to register all the entries.
3959fa9d9a7SFabian Mora   IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
3969fa9d9a7SFabian Mora   auto *EntryBB = BasicBlock::Create(C, "while.entry", RegGlobalsFn);
397*13dcc95dSJoseph Huber   auto *IfKindBB = BasicBlock::Create(C, "if.kind", RegGlobalsFn);
3989fa9d9a7SFabian Mora   auto *IfThenBB = BasicBlock::Create(C, "if.then", RegGlobalsFn);
3999fa9d9a7SFabian Mora   auto *IfElseBB = BasicBlock::Create(C, "if.else", RegGlobalsFn);
4009fa9d9a7SFabian Mora   auto *SwGlobalBB = BasicBlock::Create(C, "sw.global", RegGlobalsFn);
4019fa9d9a7SFabian Mora   auto *SwManagedBB = BasicBlock::Create(C, "sw.managed", RegGlobalsFn);
4029fa9d9a7SFabian Mora   auto *SwSurfaceBB = BasicBlock::Create(C, "sw.surface", RegGlobalsFn);
4039fa9d9a7SFabian Mora   auto *SwTextureBB = BasicBlock::Create(C, "sw.texture", RegGlobalsFn);
4049fa9d9a7SFabian Mora   auto *IfEndBB = BasicBlock::Create(C, "if.end", RegGlobalsFn);
4059fa9d9a7SFabian Mora   auto *ExitBB = BasicBlock::Create(C, "while.end", RegGlobalsFn);
4069fa9d9a7SFabian Mora 
4079fa9d9a7SFabian Mora   auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE);
4089fa9d9a7SFabian Mora   Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB);
4099fa9d9a7SFabian Mora   Builder.SetInsertPoint(EntryBB);
4109fa9d9a7SFabian Mora   auto *Entry = Builder.CreatePHI(PointerType::getUnqual(C), 2, "entry");
4119fa9d9a7SFabian Mora   auto *AddrPtr =
4129fa9d9a7SFabian Mora       Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
413*13dcc95dSJoseph Huber                                 {ConstantInt::get(Type::getInt32Ty(C), 0),
414*13dcc95dSJoseph Huber                                  ConstantInt::get(Type::getInt32Ty(C), 4)});
4159fa9d9a7SFabian Mora   auto *Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr, "addr");
416*13dcc95dSJoseph Huber   auto *AuxAddrPtr =
417*13dcc95dSJoseph Huber       Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
418*13dcc95dSJoseph Huber                                 {ConstantInt::get(Type::getInt32Ty(C), 0),
419*13dcc95dSJoseph Huber                                  ConstantInt::get(Type::getInt32Ty(C), 8)});
420*13dcc95dSJoseph Huber   auto *AuxAddr = Builder.CreateLoad(Int8PtrTy, AuxAddrPtr, "aux_addr");
421*13dcc95dSJoseph Huber   auto *KindPtr =
422*13dcc95dSJoseph Huber       Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
423*13dcc95dSJoseph Huber                                 {ConstantInt::get(Type::getInt32Ty(C), 0),
424*13dcc95dSJoseph Huber                                  ConstantInt::get(Type::getInt32Ty(C), 2)});
425*13dcc95dSJoseph Huber   auto *Kind = Builder.CreateLoad(Type::getInt16Ty(C), KindPtr, "kind");
4269fa9d9a7SFabian Mora   auto *NamePtr =
4279fa9d9a7SFabian Mora       Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
428*13dcc95dSJoseph Huber                                 {ConstantInt::get(Type::getInt32Ty(C), 0),
429*13dcc95dSJoseph Huber                                  ConstantInt::get(Type::getInt32Ty(C), 5)});
4309fa9d9a7SFabian Mora   auto *Name = Builder.CreateLoad(Int8PtrTy, NamePtr, "name");
4319fa9d9a7SFabian Mora   auto *SizePtr =
4329fa9d9a7SFabian Mora       Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
433*13dcc95dSJoseph Huber                                 {ConstantInt::get(Type::getInt32Ty(C), 0),
434*13dcc95dSJoseph Huber                                  ConstantInt::get(Type::getInt32Ty(C), 6)});
435*13dcc95dSJoseph Huber   auto *Size = Builder.CreateLoad(Type::getInt64Ty(C), SizePtr, "size");
4369fa9d9a7SFabian Mora   auto *FlagsPtr =
4379fa9d9a7SFabian Mora       Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
438*13dcc95dSJoseph Huber                                 {ConstantInt::get(Type::getInt32Ty(C), 0),
4399fa9d9a7SFabian Mora                                  ConstantInt::get(Type::getInt32Ty(C), 3)});
4409fa9d9a7SFabian Mora   auto *Flags = Builder.CreateLoad(Type::getInt32Ty(C), FlagsPtr, "flags");
4419fa9d9a7SFabian Mora   auto *DataPtr =
4429fa9d9a7SFabian Mora       Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
443*13dcc95dSJoseph Huber                                 {ConstantInt::get(Type::getInt32Ty(C), 0),
444*13dcc95dSJoseph Huber                                  ConstantInt::get(Type::getInt32Ty(C), 7)});
445*13dcc95dSJoseph Huber   auto *Data = Builder.CreateTrunc(
446*13dcc95dSJoseph Huber       Builder.CreateLoad(Type::getInt64Ty(C), DataPtr, "data"),
447*13dcc95dSJoseph Huber       Type::getInt32Ty(C));
448*13dcc95dSJoseph Huber   auto *Type = Builder.CreateAnd(
4499fa9d9a7SFabian Mora       Flags, ConstantInt::get(Type::getInt32Ty(C), 0x7), "type");
4509fa9d9a7SFabian Mora 
4519fa9d9a7SFabian Mora   // Extract the flags stored in the bit-field and convert them to C booleans.
4529fa9d9a7SFabian Mora   auto *ExternBit = Builder.CreateAnd(
4539fa9d9a7SFabian Mora       Flags, ConstantInt::get(Type::getInt32Ty(C),
4549fa9d9a7SFabian Mora                               llvm::offloading::OffloadGlobalExtern));
4559fa9d9a7SFabian Mora   auto *Extern = Builder.CreateLShr(
4569fa9d9a7SFabian Mora       ExternBit, ConstantInt::get(Type::getInt32Ty(C), 3), "extern");
4579fa9d9a7SFabian Mora   auto *ConstantBit = Builder.CreateAnd(
4589fa9d9a7SFabian Mora       Flags, ConstantInt::get(Type::getInt32Ty(C),
4599fa9d9a7SFabian Mora                               llvm::offloading::OffloadGlobalConstant));
4609fa9d9a7SFabian Mora   auto *Const = Builder.CreateLShr(
4619fa9d9a7SFabian Mora       ConstantBit, ConstantInt::get(Type::getInt32Ty(C), 4), "constant");
4629fa9d9a7SFabian Mora   auto *NormalizedBit = Builder.CreateAnd(
4639fa9d9a7SFabian Mora       Flags, ConstantInt::get(Type::getInt32Ty(C),
4649fa9d9a7SFabian Mora                               llvm::offloading::OffloadGlobalNormalized));
4659fa9d9a7SFabian Mora   auto *Normalized = Builder.CreateLShr(
4669fa9d9a7SFabian Mora       NormalizedBit, ConstantInt::get(Type::getInt32Ty(C), 5), "normalized");
467*13dcc95dSJoseph Huber   auto *KindCond = Builder.CreateICmpEQ(
468*13dcc95dSJoseph Huber       Kind, ConstantInt::get(Type::getInt16Ty(C),
469*13dcc95dSJoseph Huber                              IsHIP ? object::OffloadKind::OFK_HIP
470*13dcc95dSJoseph Huber                                    : object::OffloadKind::OFK_Cuda));
471*13dcc95dSJoseph Huber   Builder.CreateCondBr(KindCond, IfKindBB, IfEndBB);
472*13dcc95dSJoseph Huber   Builder.SetInsertPoint(IfKindBB);
473*13dcc95dSJoseph Huber   auto *FnCond = Builder.CreateICmpEQ(
474*13dcc95dSJoseph Huber       Size, ConstantInt::getNullValue(Type::getInt64Ty(C)));
4759fa9d9a7SFabian Mora   Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB);
4769fa9d9a7SFabian Mora 
4779fa9d9a7SFabian Mora   // Create kernel registration code.
4789fa9d9a7SFabian Mora   Builder.SetInsertPoint(IfThenBB);
4799fa9d9a7SFabian Mora   Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
4809fa9d9a7SFabian Mora                                ConstantInt::get(Type::getInt32Ty(C), -1),
4819fa9d9a7SFabian Mora                                ConstantPointerNull::get(Int8PtrTy),
4829fa9d9a7SFabian Mora                                ConstantPointerNull::get(Int8PtrTy),
4839fa9d9a7SFabian Mora                                ConstantPointerNull::get(Int8PtrTy),
4849fa9d9a7SFabian Mora                                ConstantPointerNull::get(Int8PtrTy),
4859fa9d9a7SFabian Mora                                ConstantPointerNull::get(Int32PtrTy)});
4869fa9d9a7SFabian Mora   Builder.CreateBr(IfEndBB);
4879fa9d9a7SFabian Mora   Builder.SetInsertPoint(IfElseBB);
4889fa9d9a7SFabian Mora 
489*13dcc95dSJoseph Huber   auto *Switch = Builder.CreateSwitch(Type, IfEndBB);
4909fa9d9a7SFabian Mora   // Create global variable registration code.
4919fa9d9a7SFabian Mora   Builder.SetInsertPoint(SwGlobalBB);
4929fa9d9a7SFabian Mora   Builder.CreateCall(RegVar,
4939fa9d9a7SFabian Mora                      {RegGlobalsFn->arg_begin(), Addr, Name, Name, Extern, Size,
4949fa9d9a7SFabian Mora                       Const, ConstantInt::get(Type::getInt32Ty(C), 0)});
4959fa9d9a7SFabian Mora   Builder.CreateBr(IfEndBB);
4969fa9d9a7SFabian Mora   Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalEntry),
4979fa9d9a7SFabian Mora                   SwGlobalBB);
4989fa9d9a7SFabian Mora 
4999fa9d9a7SFabian Mora   // Create managed variable registration code.
5009fa9d9a7SFabian Mora   Builder.SetInsertPoint(SwManagedBB);
501*13dcc95dSJoseph Huber   Builder.CreateCall(RegManagedVar, {RegGlobalsFn->arg_begin(), AuxAddr, Addr,
502*13dcc95dSJoseph Huber                                      Name, Size, Data});
5039fa9d9a7SFabian Mora   Builder.CreateBr(IfEndBB);
5049fa9d9a7SFabian Mora   Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry),
5059fa9d9a7SFabian Mora                   SwManagedBB);
5069fa9d9a7SFabian Mora   // Create surface variable registration code.
5079fa9d9a7SFabian Mora   Builder.SetInsertPoint(SwSurfaceBB);
5089fa9d9a7SFabian Mora   if (EmitSurfacesAndTextures)
5099fa9d9a7SFabian Mora     Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
5109fa9d9a7SFabian Mora                                     Data, Extern});
5119fa9d9a7SFabian Mora   Builder.CreateBr(IfEndBB);
5129fa9d9a7SFabian Mora   Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalSurfaceEntry),
5139fa9d9a7SFabian Mora                   SwSurfaceBB);
5149fa9d9a7SFabian Mora 
5159fa9d9a7SFabian Mora   // Create texture variable registration code.
5169fa9d9a7SFabian Mora   Builder.SetInsertPoint(SwTextureBB);
5179fa9d9a7SFabian Mora   if (EmitSurfacesAndTextures)
5189fa9d9a7SFabian Mora     Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
5199fa9d9a7SFabian Mora                                     Data, Normalized, Extern});
5209fa9d9a7SFabian Mora   Builder.CreateBr(IfEndBB);
5219fa9d9a7SFabian Mora   Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalTextureEntry),
5229fa9d9a7SFabian Mora                   SwTextureBB);
5239fa9d9a7SFabian Mora 
5249fa9d9a7SFabian Mora   Builder.SetInsertPoint(IfEndBB);
5259fa9d9a7SFabian Mora   auto *NewEntry = Builder.CreateInBoundsGEP(
5269fa9d9a7SFabian Mora       offloading::getEntryTy(M), Entry, ConstantInt::get(getSizeTTy(M), 1));
5279fa9d9a7SFabian Mora   auto *Cmp = Builder.CreateICmpEQ(
5289fa9d9a7SFabian Mora       NewEntry,
5299fa9d9a7SFabian Mora       ConstantExpr::getInBoundsGetElementPtr(
5309fa9d9a7SFabian Mora           ArrayType::get(offloading::getEntryTy(M), 0), EntriesE,
5319fa9d9a7SFabian Mora           ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
5329fa9d9a7SFabian Mora                                 ConstantInt::get(getSizeTTy(M), 0)})));
5339fa9d9a7SFabian Mora   Entry->addIncoming(
5349fa9d9a7SFabian Mora       ConstantExpr::getInBoundsGetElementPtr(
5359fa9d9a7SFabian Mora           ArrayType::get(offloading::getEntryTy(M), 0), EntriesB,
5369fa9d9a7SFabian Mora           ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
5379fa9d9a7SFabian Mora                                 ConstantInt::get(getSizeTTy(M), 0)})),
5389fa9d9a7SFabian Mora       &RegGlobalsFn->getEntryBlock());
5399fa9d9a7SFabian Mora   Entry->addIncoming(NewEntry, IfEndBB);
5409fa9d9a7SFabian Mora   Builder.CreateCondBr(Cmp, ExitBB, EntryBB);
5419fa9d9a7SFabian Mora   Builder.SetInsertPoint(ExitBB);
5429fa9d9a7SFabian Mora   Builder.CreateRetVoid();
5439fa9d9a7SFabian Mora 
5449fa9d9a7SFabian Mora   return RegGlobalsFn;
5459fa9d9a7SFabian Mora }
5469fa9d9a7SFabian Mora 
5479fa9d9a7SFabian Mora // Create the constructor and destructor to register the fatbinary with the CUDA
5489fa9d9a7SFabian Mora // runtime.
5499fa9d9a7SFabian Mora void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
5509fa9d9a7SFabian Mora                                   bool IsHIP, EntryArrayTy EntryArray,
5519fa9d9a7SFabian Mora                                   StringRef Suffix,
5529fa9d9a7SFabian Mora                                   bool EmitSurfacesAndTextures) {
5539fa9d9a7SFabian Mora   LLVMContext &C = M.getContext();
5549fa9d9a7SFabian Mora   auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
5559fa9d9a7SFabian Mora   auto *CtorFunc = Function::Create(
5569fa9d9a7SFabian Mora       CtorFuncTy, GlobalValue::InternalLinkage,
5579fa9d9a7SFabian Mora       (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
5589fa9d9a7SFabian Mora   CtorFunc->setSection(".text.startup");
5599fa9d9a7SFabian Mora 
5609fa9d9a7SFabian Mora   auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
5619fa9d9a7SFabian Mora   auto *DtorFunc = Function::Create(
5629fa9d9a7SFabian Mora       DtorFuncTy, GlobalValue::InternalLinkage,
5639fa9d9a7SFabian Mora       (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
5649fa9d9a7SFabian Mora   DtorFunc->setSection(".text.startup");
5659fa9d9a7SFabian Mora 
5669fa9d9a7SFabian Mora   auto *PtrTy = PointerType::getUnqual(C);
5679fa9d9a7SFabian Mora 
5689fa9d9a7SFabian Mora   // Get the __cudaRegisterFatBinary function declaration.
5699fa9d9a7SFabian Mora   auto *RegFatTy = FunctionType::get(PtrTy, PtrTy, /*isVarArg=*/false);
5709fa9d9a7SFabian Mora   FunctionCallee RegFatbin = M.getOrInsertFunction(
5719fa9d9a7SFabian Mora       IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy);
5729fa9d9a7SFabian Mora   // Get the __cudaRegisterFatBinaryEnd function declaration.
5739fa9d9a7SFabian Mora   auto *RegFatEndTy =
5749fa9d9a7SFabian Mora       FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
5759fa9d9a7SFabian Mora   FunctionCallee RegFatbinEnd =
5769fa9d9a7SFabian Mora       M.getOrInsertFunction("__cudaRegisterFatBinaryEnd", RegFatEndTy);
5779fa9d9a7SFabian Mora   // Get the __cudaUnregisterFatBinary function declaration.
5789fa9d9a7SFabian Mora   auto *UnregFatTy =
5799fa9d9a7SFabian Mora       FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
5809fa9d9a7SFabian Mora   FunctionCallee UnregFatbin = M.getOrInsertFunction(
5819fa9d9a7SFabian Mora       IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary",
5829fa9d9a7SFabian Mora       UnregFatTy);
5839fa9d9a7SFabian Mora 
5849fa9d9a7SFabian Mora   auto *AtExitTy =
5859fa9d9a7SFabian Mora       FunctionType::get(Type::getInt32Ty(C), PtrTy, /*isVarArg=*/false);
5869fa9d9a7SFabian Mora   FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
5879fa9d9a7SFabian Mora 
5889fa9d9a7SFabian Mora   auto *BinaryHandleGlobal = new llvm::GlobalVariable(
5899fa9d9a7SFabian Mora       M, PtrTy, false, llvm::GlobalValue::InternalLinkage,
5909fa9d9a7SFabian Mora       llvm::ConstantPointerNull::get(PtrTy),
5919fa9d9a7SFabian Mora       (IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle") + Suffix);
5929fa9d9a7SFabian Mora 
5939fa9d9a7SFabian Mora   // Create the constructor to register this image with the runtime.
5949fa9d9a7SFabian Mora   IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc));
5959fa9d9a7SFabian Mora   CallInst *Handle = CtorBuilder.CreateCall(
5969fa9d9a7SFabian Mora       RegFatbin,
5979fa9d9a7SFabian Mora       ConstantExpr::getPointerBitCastOrAddrSpaceCast(FatbinDesc, PtrTy));
5989fa9d9a7SFabian Mora   CtorBuilder.CreateAlignedStore(
5999fa9d9a7SFabian Mora       Handle, BinaryHandleGlobal,
6009fa9d9a7SFabian Mora       Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
6019fa9d9a7SFabian Mora   CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray,
6029fa9d9a7SFabian Mora                                                        Suffix,
6039fa9d9a7SFabian Mora                                                        EmitSurfacesAndTextures),
6049fa9d9a7SFabian Mora                          Handle);
6059fa9d9a7SFabian Mora   if (!IsHIP)
6069fa9d9a7SFabian Mora     CtorBuilder.CreateCall(RegFatbinEnd, Handle);
6079fa9d9a7SFabian Mora   CtorBuilder.CreateCall(AtExit, DtorFunc);
6089fa9d9a7SFabian Mora   CtorBuilder.CreateRetVoid();
6099fa9d9a7SFabian Mora 
6109fa9d9a7SFabian Mora   // Create the destructor to unregister the image with the runtime. We cannot
6119fa9d9a7SFabian Mora   // use a standard global destructor after CUDA 9.2 so this must be called by
612d6fc7d3aSJay Foad   // `atexit()` instead.
6139fa9d9a7SFabian Mora   IRBuilder<> DtorBuilder(BasicBlock::Create(C, "entry", DtorFunc));
6149fa9d9a7SFabian Mora   LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad(
6159fa9d9a7SFabian Mora       PtrTy, BinaryHandleGlobal,
6169fa9d9a7SFabian Mora       Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
6179fa9d9a7SFabian Mora   DtorBuilder.CreateCall(UnregFatbin, BinaryHandle);
6189fa9d9a7SFabian Mora   DtorBuilder.CreateRetVoid();
6199fa9d9a7SFabian Mora 
6209fa9d9a7SFabian Mora   // Add this function to constructors.
621421085fdSJoseph Huber   appendToGlobalCtors(M, CtorFunc, /*Priority=*/101);
6229fa9d9a7SFabian Mora }
6239fa9d9a7SFabian Mora } // namespace
6249fa9d9a7SFabian Mora 
6259fa9d9a7SFabian Mora Error offloading::wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images,
6269fa9d9a7SFabian Mora                                      EntryArrayTy EntryArray,
6275c840542SJoseph Huber                                      llvm::StringRef Suffix, bool Relocatable) {
6285c840542SJoseph Huber   GlobalVariable *Desc =
6295c840542SJoseph Huber       createBinDesc(M, Images, EntryArray, Suffix, Relocatable);
6309fa9d9a7SFabian Mora   if (!Desc)
6319fa9d9a7SFabian Mora     return createStringError(inconvertibleErrorCode(),
6329fa9d9a7SFabian Mora                              "No binary descriptors created.");
6339fa9d9a7SFabian Mora   createRegisterFunction(M, Desc, Suffix);
6349fa9d9a7SFabian Mora   return Error::success();
6359fa9d9a7SFabian Mora }
6369fa9d9a7SFabian Mora 
6379fa9d9a7SFabian Mora Error offloading::wrapCudaBinary(Module &M, ArrayRef<char> Image,
6389fa9d9a7SFabian Mora                                  EntryArrayTy EntryArray,
6399fa9d9a7SFabian Mora                                  llvm::StringRef Suffix,
6409fa9d9a7SFabian Mora                                  bool EmitSurfacesAndTextures) {
6419fa9d9a7SFabian Mora   GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/false, Suffix);
6429fa9d9a7SFabian Mora   if (!Desc)
6439fa9d9a7SFabian Mora     return createStringError(inconvertibleErrorCode(),
6449fa9d9a7SFabian Mora                              "No fatbin section created.");
6459fa9d9a7SFabian Mora 
6469fa9d9a7SFabian Mora   createRegisterFatbinFunction(M, Desc, /*IsHip=*/false, EntryArray, Suffix,
6479fa9d9a7SFabian Mora                                EmitSurfacesAndTextures);
6489fa9d9a7SFabian Mora   return Error::success();
6499fa9d9a7SFabian Mora }
6509fa9d9a7SFabian Mora 
6519fa9d9a7SFabian Mora Error offloading::wrapHIPBinary(Module &M, ArrayRef<char> Image,
6529fa9d9a7SFabian Mora                                 EntryArrayTy EntryArray, llvm::StringRef Suffix,
6539fa9d9a7SFabian Mora                                 bool EmitSurfacesAndTextures) {
6549fa9d9a7SFabian Mora   GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/true, Suffix);
6559fa9d9a7SFabian Mora   if (!Desc)
6569fa9d9a7SFabian Mora     return createStringError(inconvertibleErrorCode(),
6579fa9d9a7SFabian Mora                              "No fatbin section created.");
6589fa9d9a7SFabian Mora 
6599fa9d9a7SFabian Mora   createRegisterFatbinFunction(M, Desc, /*IsHip=*/true, EntryArray, Suffix,
6609fa9d9a7SFabian Mora                                EmitSurfacesAndTextures);
6619fa9d9a7SFabian Mora   return Error::success();
6629fa9d9a7SFabian Mora }
663