1ddeab07cSAnubhab Ghosh //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===// 2ddeab07cSAnubhab Ghosh // 3ddeab07cSAnubhab Ghosh // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4ddeab07cSAnubhab Ghosh // See https://llvm.org/LICENSE.txt for license information. 5ddeab07cSAnubhab Ghosh // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ddeab07cSAnubhab Ghosh // 7ddeab07cSAnubhab Ghosh //===----------------------------------------------------------------------===// 8ddeab07cSAnubhab Ghosh // 9ddeab07cSAnubhab Ghosh // This file implements offloading to CUDA devices. 10ddeab07cSAnubhab Ghosh // 11ddeab07cSAnubhab Ghosh //===----------------------------------------------------------------------===// 12ddeab07cSAnubhab Ghosh 13ddeab07cSAnubhab Ghosh #include "DeviceOffload.h" 14ddeab07cSAnubhab Ghosh 15ddeab07cSAnubhab Ghosh #include "clang/Basic/TargetOptions.h" 16ddeab07cSAnubhab Ghosh #include "clang/CodeGen/ModuleBuilder.h" 17ddeab07cSAnubhab Ghosh #include "clang/Frontend/CompilerInstance.h" 18*a72d7eeaSVassil Vassilev #include "clang/Interpreter/PartialTranslationUnit.h" 19ddeab07cSAnubhab Ghosh 20ddeab07cSAnubhab Ghosh #include "llvm/IR/LegacyPassManager.h" 214169338eSNikita Popov #include "llvm/IR/Module.h" 22ddeab07cSAnubhab Ghosh #include "llvm/MC/TargetRegistry.h" 23ddeab07cSAnubhab Ghosh #include "llvm/Target/TargetMachine.h" 24ddeab07cSAnubhab Ghosh 25ddeab07cSAnubhab Ghosh namespace clang { 26ddeab07cSAnubhab Ghosh 27ddeab07cSAnubhab Ghosh IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( 28*a72d7eeaSVassil Vassilev std::unique_ptr<CompilerInstance> DeviceInstance, 29*a72d7eeaSVassil Vassilev CompilerInstance &HostInstance, 30ddeab07cSAnubhab Ghosh llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS, 31*a72d7eeaSVassil Vassilev llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs) 32*a72d7eeaSVassil Vassilev : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS), 33*a72d7eeaSVassil Vassilev CodeGenOpts(HostInstance.getCodeGenOpts()), 34*a72d7eeaSVassil Vassilev TargetOpts(HostInstance.getTargetOpts()) { 35ddeab07cSAnubhab Ghosh if (Err) 36ddeab07cSAnubhab Ghosh return; 37*a72d7eeaSVassil Vassilev DeviceCI = std::move(DeviceInstance); 38*a72d7eeaSVassil Vassilev StringRef Arch = TargetOpts.CPU; 39ddeab07cSAnubhab Ghosh if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { 40ddeab07cSAnubhab Ghosh Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>( 41ddeab07cSAnubhab Ghosh "Invalid CUDA architecture", 42ddeab07cSAnubhab Ghosh llvm::inconvertibleErrorCode())); 43ddeab07cSAnubhab Ghosh return; 44ddeab07cSAnubhab Ghosh } 45ddeab07cSAnubhab Ghosh } 46ddeab07cSAnubhab Ghosh 47*a72d7eeaSVassil Vassilev llvm::Expected<TranslationUnitDecl *> 48ddeab07cSAnubhab Ghosh IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { 49ddeab07cSAnubhab Ghosh auto PTU = IncrementalParser::Parse(Input); 50ddeab07cSAnubhab Ghosh if (!PTU) 51ddeab07cSAnubhab Ghosh return PTU.takeError(); 52ddeab07cSAnubhab Ghosh 53ddeab07cSAnubhab Ghosh auto PTX = GeneratePTX(); 54ddeab07cSAnubhab Ghosh if (!PTX) 55ddeab07cSAnubhab Ghosh return PTX.takeError(); 56ddeab07cSAnubhab Ghosh 57ddeab07cSAnubhab Ghosh auto Err = GenerateFatbinary(); 58ddeab07cSAnubhab Ghosh if (Err) 59ddeab07cSAnubhab Ghosh return std::move(Err); 60ddeab07cSAnubhab Ghosh 61ddeab07cSAnubhab Ghosh std::string FatbinFileName = 62ddeab07cSAnubhab Ghosh "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; 63ddeab07cSAnubhab Ghosh VFS->addFile(FatbinFileName, 0, 64ddeab07cSAnubhab Ghosh llvm::MemoryBuffer::getMemBuffer( 65ddeab07cSAnubhab Ghosh llvm::StringRef(FatbinContent.data(), FatbinContent.size()), 66ddeab07cSAnubhab Ghosh "", false)); 67ddeab07cSAnubhab Ghosh 68*a72d7eeaSVassil Vassilev CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; 69ddeab07cSAnubhab Ghosh 70ddeab07cSAnubhab Ghosh FatbinContent.clear(); 71ddeab07cSAnubhab Ghosh 72ddeab07cSAnubhab Ghosh return PTU; 73ddeab07cSAnubhab Ghosh } 74ddeab07cSAnubhab Ghosh 75ddeab07cSAnubhab Ghosh llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { 76ddeab07cSAnubhab Ghosh auto &PTU = PTUs.back(); 77ddeab07cSAnubhab Ghosh std::string Error; 78ddeab07cSAnubhab Ghosh 79ddeab07cSAnubhab Ghosh const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( 80ddeab07cSAnubhab Ghosh PTU.TheModule->getTargetTriple(), Error); 81ddeab07cSAnubhab Ghosh if (!Target) 82ddeab07cSAnubhab Ghosh return llvm::make_error<llvm::StringError>(std::move(Error), 83ddeab07cSAnubhab Ghosh std::error_code()); 84ddeab07cSAnubhab Ghosh llvm::TargetOptions TO = llvm::TargetOptions(); 85ddeab07cSAnubhab Ghosh llvm::TargetMachine *TargetMachine = Target->createTargetMachine( 86*a72d7eeaSVassil Vassilev PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO, 87ddeab07cSAnubhab Ghosh llvm::Reloc::Model::PIC_); 88ddeab07cSAnubhab Ghosh PTU.TheModule->setDataLayout(TargetMachine->createDataLayout()); 89ddeab07cSAnubhab Ghosh 90ddeab07cSAnubhab Ghosh PTXCode.clear(); 91ddeab07cSAnubhab Ghosh llvm::raw_svector_ostream dest(PTXCode); 92ddeab07cSAnubhab Ghosh 93ddeab07cSAnubhab Ghosh llvm::legacy::PassManager PM; 94ddeab07cSAnubhab Ghosh if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr, 950a1aa6cdSArthur Eubanks llvm::CodeGenFileType::AssemblyFile)) { 96ddeab07cSAnubhab Ghosh return llvm::make_error<llvm::StringError>( 97ddeab07cSAnubhab Ghosh "NVPTX backend cannot produce PTX code.", 98ddeab07cSAnubhab Ghosh llvm::inconvertibleErrorCode()); 99ddeab07cSAnubhab Ghosh } 100ddeab07cSAnubhab Ghosh 101ddeab07cSAnubhab Ghosh if (!PM.run(*PTU.TheModule)) 102ddeab07cSAnubhab Ghosh return llvm::make_error<llvm::StringError>("Failed to emit PTX code.", 103ddeab07cSAnubhab Ghosh llvm::inconvertibleErrorCode()); 104ddeab07cSAnubhab Ghosh 105ddeab07cSAnubhab Ghosh PTXCode += '\0'; 106ddeab07cSAnubhab Ghosh while (PTXCode.size() % 8) 107ddeab07cSAnubhab Ghosh PTXCode += '\0'; 108ddeab07cSAnubhab Ghosh return PTXCode.str(); 109ddeab07cSAnubhab Ghosh } 110ddeab07cSAnubhab Ghosh 111ddeab07cSAnubhab Ghosh llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { 112ddeab07cSAnubhab Ghosh enum FatBinFlags { 113ddeab07cSAnubhab Ghosh AddressSize64 = 0x01, 114ddeab07cSAnubhab Ghosh HasDebugInfo = 0x02, 115ddeab07cSAnubhab Ghosh ProducerCuda = 0x04, 116ddeab07cSAnubhab Ghosh HostLinux = 0x10, 117ddeab07cSAnubhab Ghosh HostMac = 0x20, 118ddeab07cSAnubhab Ghosh HostWindows = 0x40 119ddeab07cSAnubhab Ghosh }; 120ddeab07cSAnubhab Ghosh 121ddeab07cSAnubhab Ghosh struct FatBinInnerHeader { 122ddeab07cSAnubhab Ghosh uint16_t Kind; // 0x00 123ddeab07cSAnubhab Ghosh uint16_t unknown02; // 0x02 124ddeab07cSAnubhab Ghosh uint32_t HeaderSize; // 0x04 125ddeab07cSAnubhab Ghosh uint32_t DataSize; // 0x08 126ddeab07cSAnubhab Ghosh uint32_t unknown0c; // 0x0c 127ddeab07cSAnubhab Ghosh uint32_t CompressedSize; // 0x10 128ddeab07cSAnubhab Ghosh uint32_t SubHeaderSize; // 0x14 129ddeab07cSAnubhab Ghosh uint16_t VersionMinor; // 0x18 130ddeab07cSAnubhab Ghosh uint16_t VersionMajor; // 0x1a 131ddeab07cSAnubhab Ghosh uint32_t CudaArch; // 0x1c 132ddeab07cSAnubhab Ghosh uint32_t unknown20; // 0x20 133ddeab07cSAnubhab Ghosh uint32_t unknown24; // 0x24 134ddeab07cSAnubhab Ghosh uint32_t Flags; // 0x28 135ddeab07cSAnubhab Ghosh uint32_t unknown2c; // 0x2c 136ddeab07cSAnubhab Ghosh uint32_t unknown30; // 0x30 137ddeab07cSAnubhab Ghosh uint32_t unknown34; // 0x34 138ddeab07cSAnubhab Ghosh uint32_t UncompressedSize; // 0x38 139ddeab07cSAnubhab Ghosh uint32_t unknown3c; // 0x3c 140ddeab07cSAnubhab Ghosh uint32_t unknown40; // 0x40 141ddeab07cSAnubhab Ghosh uint32_t unknown44; // 0x44 142ddeab07cSAnubhab Ghosh FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags) 143ddeab07cSAnubhab Ghosh : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)), 144ddeab07cSAnubhab Ghosh DataSize(DataSize), unknown0c(0), CompressedSize(0), 145ddeab07cSAnubhab Ghosh SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4), 146ddeab07cSAnubhab Ghosh CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags), 147ddeab07cSAnubhab Ghosh unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0), 148ddeab07cSAnubhab Ghosh unknown3c(0), unknown40(0), unknown44(0) {} 149ddeab07cSAnubhab Ghosh }; 150ddeab07cSAnubhab Ghosh 151ddeab07cSAnubhab Ghosh struct FatBinHeader { 152ddeab07cSAnubhab Ghosh uint32_t Magic; // 0x00 153ddeab07cSAnubhab Ghosh uint16_t Version; // 0x04 154ddeab07cSAnubhab Ghosh uint16_t HeaderSize; // 0x06 155ddeab07cSAnubhab Ghosh uint32_t DataSize; // 0x08 156ddeab07cSAnubhab Ghosh uint32_t unknown0c; // 0x0c 157ddeab07cSAnubhab Ghosh public: 158ddeab07cSAnubhab Ghosh FatBinHeader(uint32_t DataSize) 159ddeab07cSAnubhab Ghosh : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)), 160ddeab07cSAnubhab Ghosh DataSize(DataSize), unknown0c(0) {} 161ddeab07cSAnubhab Ghosh }; 162ddeab07cSAnubhab Ghosh 163ddeab07cSAnubhab Ghosh FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size()); 164ddeab07cSAnubhab Ghosh FatbinContent.append((char *)&OuterHeader, 165ddeab07cSAnubhab Ghosh ((char *)&OuterHeader) + OuterHeader.HeaderSize); 166ddeab07cSAnubhab Ghosh 167ddeab07cSAnubhab Ghosh FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion, 168ddeab07cSAnubhab Ghosh FatBinFlags::AddressSize64 | 169ddeab07cSAnubhab Ghosh FatBinFlags::HostLinux); 170ddeab07cSAnubhab Ghosh FatbinContent.append((char *)&InnerHeader, 171ddeab07cSAnubhab Ghosh ((char *)&InnerHeader) + InnerHeader.HeaderSize); 172ddeab07cSAnubhab Ghosh 173ddeab07cSAnubhab Ghosh FatbinContent.append(PTXCode.begin(), PTXCode.end()); 174ddeab07cSAnubhab Ghosh 175ddeab07cSAnubhab Ghosh return llvm::Error::success(); 176ddeab07cSAnubhab Ghosh } 177ddeab07cSAnubhab Ghosh 178ddeab07cSAnubhab Ghosh IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {} 179ddeab07cSAnubhab Ghosh 180ddeab07cSAnubhab Ghosh } // namespace clang 181