1*06c3fb27SDimitry Andric //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===// 2*06c3fb27SDimitry Andric // 3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*06c3fb27SDimitry Andric // 7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 8*06c3fb27SDimitry Andric // 9*06c3fb27SDimitry Andric // This file implements offloading to CUDA devices. 10*06c3fb27SDimitry Andric // 11*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 12*06c3fb27SDimitry Andric 13*06c3fb27SDimitry Andric #include "DeviceOffload.h" 14*06c3fb27SDimitry Andric 15*06c3fb27SDimitry Andric #include "clang/Basic/TargetOptions.h" 16*06c3fb27SDimitry Andric #include "clang/CodeGen/ModuleBuilder.h" 17*06c3fb27SDimitry Andric #include "clang/Frontend/CompilerInstance.h" 18*06c3fb27SDimitry Andric 19*06c3fb27SDimitry Andric #include "llvm/IR/LegacyPassManager.h" 20*06c3fb27SDimitry Andric #include "llvm/MC/TargetRegistry.h" 21*06c3fb27SDimitry Andric #include "llvm/Target/TargetMachine.h" 22*06c3fb27SDimitry Andric 23*06c3fb27SDimitry Andric namespace clang { 24*06c3fb27SDimitry Andric 25*06c3fb27SDimitry Andric IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( 26*06c3fb27SDimitry Andric Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance, 27*06c3fb27SDimitry Andric IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx, 28*06c3fb27SDimitry Andric llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS, 29*06c3fb27SDimitry Andric llvm::Error &Err) 30*06c3fb27SDimitry Andric : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err), 31*06c3fb27SDimitry Andric HostParser(HostParser), VFS(FS) { 32*06c3fb27SDimitry Andric if (Err) 33*06c3fb27SDimitry Andric return; 34*06c3fb27SDimitry Andric StringRef Arch = CI->getTargetOpts().CPU; 35*06c3fb27SDimitry Andric if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { 36*06c3fb27SDimitry Andric Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>( 37*06c3fb27SDimitry Andric "Invalid CUDA architecture", 38*06c3fb27SDimitry Andric llvm::inconvertibleErrorCode())); 39*06c3fb27SDimitry Andric return; 40*06c3fb27SDimitry Andric } 41*06c3fb27SDimitry Andric } 42*06c3fb27SDimitry Andric 43*06c3fb27SDimitry Andric llvm::Expected<PartialTranslationUnit &> 44*06c3fb27SDimitry Andric IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { 45*06c3fb27SDimitry Andric auto PTU = IncrementalParser::Parse(Input); 46*06c3fb27SDimitry Andric if (!PTU) 47*06c3fb27SDimitry Andric return PTU.takeError(); 48*06c3fb27SDimitry Andric 49*06c3fb27SDimitry Andric auto PTX = GeneratePTX(); 50*06c3fb27SDimitry Andric if (!PTX) 51*06c3fb27SDimitry Andric return PTX.takeError(); 52*06c3fb27SDimitry Andric 53*06c3fb27SDimitry Andric auto Err = GenerateFatbinary(); 54*06c3fb27SDimitry Andric if (Err) 55*06c3fb27SDimitry Andric return std::move(Err); 56*06c3fb27SDimitry Andric 57*06c3fb27SDimitry Andric std::string FatbinFileName = 58*06c3fb27SDimitry Andric "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; 59*06c3fb27SDimitry Andric VFS->addFile(FatbinFileName, 0, 60*06c3fb27SDimitry Andric llvm::MemoryBuffer::getMemBuffer( 61*06c3fb27SDimitry Andric llvm::StringRef(FatbinContent.data(), FatbinContent.size()), 62*06c3fb27SDimitry Andric "", false)); 63*06c3fb27SDimitry Andric 64*06c3fb27SDimitry Andric HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName; 65*06c3fb27SDimitry Andric 66*06c3fb27SDimitry Andric FatbinContent.clear(); 67*06c3fb27SDimitry Andric 68*06c3fb27SDimitry Andric return PTU; 69*06c3fb27SDimitry Andric } 70*06c3fb27SDimitry Andric 71*06c3fb27SDimitry Andric llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { 72*06c3fb27SDimitry Andric auto &PTU = PTUs.back(); 73*06c3fb27SDimitry Andric std::string Error; 74*06c3fb27SDimitry Andric 75*06c3fb27SDimitry Andric const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( 76*06c3fb27SDimitry Andric PTU.TheModule->getTargetTriple(), Error); 77*06c3fb27SDimitry Andric if (!Target) 78*06c3fb27SDimitry Andric return llvm::make_error<llvm::StringError>(std::move(Error), 79*06c3fb27SDimitry Andric std::error_code()); 80*06c3fb27SDimitry Andric llvm::TargetOptions TO = llvm::TargetOptions(); 81*06c3fb27SDimitry Andric llvm::TargetMachine *TargetMachine = Target->createTargetMachine( 82*06c3fb27SDimitry Andric PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO, 83*06c3fb27SDimitry Andric llvm::Reloc::Model::PIC_); 84*06c3fb27SDimitry Andric PTU.TheModule->setDataLayout(TargetMachine->createDataLayout()); 85*06c3fb27SDimitry Andric 86*06c3fb27SDimitry Andric PTXCode.clear(); 87*06c3fb27SDimitry Andric llvm::raw_svector_ostream dest(PTXCode); 88*06c3fb27SDimitry Andric 89*06c3fb27SDimitry Andric llvm::legacy::PassManager PM; 90*06c3fb27SDimitry Andric if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr, 91*06c3fb27SDimitry Andric llvm::CGFT_AssemblyFile)) { 92*06c3fb27SDimitry Andric return llvm::make_error<llvm::StringError>( 93*06c3fb27SDimitry Andric "NVPTX backend cannot produce PTX code.", 94*06c3fb27SDimitry Andric llvm::inconvertibleErrorCode()); 95*06c3fb27SDimitry Andric } 96*06c3fb27SDimitry Andric 97*06c3fb27SDimitry Andric if (!PM.run(*PTU.TheModule)) 98*06c3fb27SDimitry Andric return llvm::make_error<llvm::StringError>("Failed to emit PTX code.", 99*06c3fb27SDimitry Andric llvm::inconvertibleErrorCode()); 100*06c3fb27SDimitry Andric 101*06c3fb27SDimitry Andric PTXCode += '\0'; 102*06c3fb27SDimitry Andric while (PTXCode.size() % 8) 103*06c3fb27SDimitry Andric PTXCode += '\0'; 104*06c3fb27SDimitry Andric return PTXCode.str(); 105*06c3fb27SDimitry Andric } 106*06c3fb27SDimitry Andric 107*06c3fb27SDimitry Andric llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { 108*06c3fb27SDimitry Andric enum FatBinFlags { 109*06c3fb27SDimitry Andric AddressSize64 = 0x01, 110*06c3fb27SDimitry Andric HasDebugInfo = 0x02, 111*06c3fb27SDimitry Andric ProducerCuda = 0x04, 112*06c3fb27SDimitry Andric HostLinux = 0x10, 113*06c3fb27SDimitry Andric HostMac = 0x20, 114*06c3fb27SDimitry Andric HostWindows = 0x40 115*06c3fb27SDimitry Andric }; 116*06c3fb27SDimitry Andric 117*06c3fb27SDimitry Andric struct FatBinInnerHeader { 118*06c3fb27SDimitry Andric uint16_t Kind; // 0x00 119*06c3fb27SDimitry Andric uint16_t unknown02; // 0x02 120*06c3fb27SDimitry Andric uint32_t HeaderSize; // 0x04 121*06c3fb27SDimitry Andric uint32_t DataSize; // 0x08 122*06c3fb27SDimitry Andric uint32_t unknown0c; // 0x0c 123*06c3fb27SDimitry Andric uint32_t CompressedSize; // 0x10 124*06c3fb27SDimitry Andric uint32_t SubHeaderSize; // 0x14 125*06c3fb27SDimitry Andric uint16_t VersionMinor; // 0x18 126*06c3fb27SDimitry Andric uint16_t VersionMajor; // 0x1a 127*06c3fb27SDimitry Andric uint32_t CudaArch; // 0x1c 128*06c3fb27SDimitry Andric uint32_t unknown20; // 0x20 129*06c3fb27SDimitry Andric uint32_t unknown24; // 0x24 130*06c3fb27SDimitry Andric uint32_t Flags; // 0x28 131*06c3fb27SDimitry Andric uint32_t unknown2c; // 0x2c 132*06c3fb27SDimitry Andric uint32_t unknown30; // 0x30 133*06c3fb27SDimitry Andric uint32_t unknown34; // 0x34 134*06c3fb27SDimitry Andric uint32_t UncompressedSize; // 0x38 135*06c3fb27SDimitry Andric uint32_t unknown3c; // 0x3c 136*06c3fb27SDimitry Andric uint32_t unknown40; // 0x40 137*06c3fb27SDimitry Andric uint32_t unknown44; // 0x44 138*06c3fb27SDimitry Andric FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags) 139*06c3fb27SDimitry Andric : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)), 140*06c3fb27SDimitry Andric DataSize(DataSize), unknown0c(0), CompressedSize(0), 141*06c3fb27SDimitry Andric SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4), 142*06c3fb27SDimitry Andric CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags), 143*06c3fb27SDimitry Andric unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0), 144*06c3fb27SDimitry Andric unknown3c(0), unknown40(0), unknown44(0) {} 145*06c3fb27SDimitry Andric }; 146*06c3fb27SDimitry Andric 147*06c3fb27SDimitry Andric struct FatBinHeader { 148*06c3fb27SDimitry Andric uint32_t Magic; // 0x00 149*06c3fb27SDimitry Andric uint16_t Version; // 0x04 150*06c3fb27SDimitry Andric uint16_t HeaderSize; // 0x06 151*06c3fb27SDimitry Andric uint32_t DataSize; // 0x08 152*06c3fb27SDimitry Andric uint32_t unknown0c; // 0x0c 153*06c3fb27SDimitry Andric public: 154*06c3fb27SDimitry Andric FatBinHeader(uint32_t DataSize) 155*06c3fb27SDimitry Andric : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)), 156*06c3fb27SDimitry Andric DataSize(DataSize), unknown0c(0) {} 157*06c3fb27SDimitry Andric }; 158*06c3fb27SDimitry Andric 159*06c3fb27SDimitry Andric FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size()); 160*06c3fb27SDimitry Andric FatbinContent.append((char *)&OuterHeader, 161*06c3fb27SDimitry Andric ((char *)&OuterHeader) + OuterHeader.HeaderSize); 162*06c3fb27SDimitry Andric 163*06c3fb27SDimitry Andric FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion, 164*06c3fb27SDimitry Andric FatBinFlags::AddressSize64 | 165*06c3fb27SDimitry Andric FatBinFlags::HostLinux); 166*06c3fb27SDimitry Andric FatbinContent.append((char *)&InnerHeader, 167*06c3fb27SDimitry Andric ((char *)&InnerHeader) + InnerHeader.HeaderSize); 168*06c3fb27SDimitry Andric 169*06c3fb27SDimitry Andric FatbinContent.append(PTXCode.begin(), PTXCode.end()); 170*06c3fb27SDimitry Andric 171*06c3fb27SDimitry Andric return llvm::Error::success(); 172*06c3fb27SDimitry Andric } 173*06c3fb27SDimitry Andric 174*06c3fb27SDimitry Andric IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {} 175*06c3fb27SDimitry Andric 176*06c3fb27SDimitry Andric } // namespace clang 177