106c3fb27SDimitry Andric //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file implements offloading to CUDA devices. 1006c3fb27SDimitry Andric // 1106c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1206c3fb27SDimitry Andric 1306c3fb27SDimitry Andric #include "DeviceOffload.h" 1406c3fb27SDimitry Andric 1506c3fb27SDimitry Andric #include "clang/Basic/TargetOptions.h" 1606c3fb27SDimitry Andric #include "clang/CodeGen/ModuleBuilder.h" 1706c3fb27SDimitry Andric #include "clang/Frontend/CompilerInstance.h" 1806c3fb27SDimitry Andric 1906c3fb27SDimitry Andric #include "llvm/IR/LegacyPassManager.h" 20*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h" 2106c3fb27SDimitry Andric #include "llvm/MC/TargetRegistry.h" 2206c3fb27SDimitry Andric #include "llvm/Target/TargetMachine.h" 2306c3fb27SDimitry Andric 2406c3fb27SDimitry Andric namespace clang { 2506c3fb27SDimitry Andric 2606c3fb27SDimitry Andric IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( 2706c3fb27SDimitry Andric Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance, 2806c3fb27SDimitry Andric IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx, 2906c3fb27SDimitry Andric llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS, 3006c3fb27SDimitry Andric llvm::Error &Err) 3106c3fb27SDimitry Andric : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err), 3206c3fb27SDimitry Andric HostParser(HostParser), VFS(FS) { 3306c3fb27SDimitry Andric if (Err) 3406c3fb27SDimitry Andric return; 3506c3fb27SDimitry Andric StringRef Arch = CI->getTargetOpts().CPU; 3606c3fb27SDimitry Andric if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { 3706c3fb27SDimitry Andric Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>( 3806c3fb27SDimitry Andric "Invalid CUDA architecture", 3906c3fb27SDimitry Andric llvm::inconvertibleErrorCode())); 4006c3fb27SDimitry Andric return; 4106c3fb27SDimitry Andric } 4206c3fb27SDimitry Andric } 4306c3fb27SDimitry Andric 4406c3fb27SDimitry Andric llvm::Expected<PartialTranslationUnit &> 4506c3fb27SDimitry Andric IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { 4606c3fb27SDimitry Andric auto PTU = IncrementalParser::Parse(Input); 4706c3fb27SDimitry Andric if (!PTU) 4806c3fb27SDimitry Andric return PTU.takeError(); 4906c3fb27SDimitry Andric 5006c3fb27SDimitry Andric auto PTX = GeneratePTX(); 5106c3fb27SDimitry Andric if (!PTX) 5206c3fb27SDimitry Andric return PTX.takeError(); 5306c3fb27SDimitry Andric 5406c3fb27SDimitry Andric auto Err = GenerateFatbinary(); 5506c3fb27SDimitry Andric if (Err) 5606c3fb27SDimitry Andric return std::move(Err); 5706c3fb27SDimitry Andric 5806c3fb27SDimitry Andric std::string FatbinFileName = 5906c3fb27SDimitry Andric "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; 6006c3fb27SDimitry Andric VFS->addFile(FatbinFileName, 0, 6106c3fb27SDimitry Andric llvm::MemoryBuffer::getMemBuffer( 6206c3fb27SDimitry Andric llvm::StringRef(FatbinContent.data(), FatbinContent.size()), 6306c3fb27SDimitry Andric "", false)); 6406c3fb27SDimitry Andric 6506c3fb27SDimitry Andric HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName; 6606c3fb27SDimitry Andric 6706c3fb27SDimitry Andric FatbinContent.clear(); 6806c3fb27SDimitry Andric 6906c3fb27SDimitry Andric return PTU; 7006c3fb27SDimitry Andric } 7106c3fb27SDimitry Andric 7206c3fb27SDimitry Andric llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { 7306c3fb27SDimitry Andric auto &PTU = PTUs.back(); 7406c3fb27SDimitry Andric std::string Error; 7506c3fb27SDimitry Andric 7606c3fb27SDimitry Andric const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( 7706c3fb27SDimitry Andric PTU.TheModule->getTargetTriple(), Error); 7806c3fb27SDimitry Andric if (!Target) 7906c3fb27SDimitry Andric return llvm::make_error<llvm::StringError>(std::move(Error), 8006c3fb27SDimitry Andric std::error_code()); 8106c3fb27SDimitry Andric llvm::TargetOptions TO = llvm::TargetOptions(); 8206c3fb27SDimitry Andric llvm::TargetMachine *TargetMachine = Target->createTargetMachine( 8306c3fb27SDimitry Andric PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO, 8406c3fb27SDimitry Andric llvm::Reloc::Model::PIC_); 8506c3fb27SDimitry Andric PTU.TheModule->setDataLayout(TargetMachine->createDataLayout()); 8606c3fb27SDimitry Andric 8706c3fb27SDimitry Andric PTXCode.clear(); 8806c3fb27SDimitry Andric llvm::raw_svector_ostream dest(PTXCode); 8906c3fb27SDimitry Andric 9006c3fb27SDimitry Andric llvm::legacy::PassManager PM; 9106c3fb27SDimitry Andric if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr, 925f757f3fSDimitry Andric llvm::CodeGenFileType::AssemblyFile)) { 9306c3fb27SDimitry Andric return llvm::make_error<llvm::StringError>( 9406c3fb27SDimitry Andric "NVPTX backend cannot produce PTX code.", 9506c3fb27SDimitry Andric llvm::inconvertibleErrorCode()); 9606c3fb27SDimitry Andric } 9706c3fb27SDimitry Andric 9806c3fb27SDimitry Andric if (!PM.run(*PTU.TheModule)) 9906c3fb27SDimitry Andric return llvm::make_error<llvm::StringError>("Failed to emit PTX code.", 10006c3fb27SDimitry Andric llvm::inconvertibleErrorCode()); 10106c3fb27SDimitry Andric 10206c3fb27SDimitry Andric PTXCode += '\0'; 10306c3fb27SDimitry Andric while (PTXCode.size() % 8) 10406c3fb27SDimitry Andric PTXCode += '\0'; 10506c3fb27SDimitry Andric return PTXCode.str(); 10606c3fb27SDimitry Andric } 10706c3fb27SDimitry Andric 10806c3fb27SDimitry Andric llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { 10906c3fb27SDimitry Andric enum FatBinFlags { 11006c3fb27SDimitry Andric AddressSize64 = 0x01, 11106c3fb27SDimitry Andric HasDebugInfo = 0x02, 11206c3fb27SDimitry Andric ProducerCuda = 0x04, 11306c3fb27SDimitry Andric HostLinux = 0x10, 11406c3fb27SDimitry Andric HostMac = 0x20, 11506c3fb27SDimitry Andric HostWindows = 0x40 11606c3fb27SDimitry Andric }; 11706c3fb27SDimitry Andric 11806c3fb27SDimitry Andric struct FatBinInnerHeader { 11906c3fb27SDimitry Andric uint16_t Kind; // 0x00 12006c3fb27SDimitry Andric uint16_t unknown02; // 0x02 12106c3fb27SDimitry Andric uint32_t HeaderSize; // 0x04 12206c3fb27SDimitry Andric uint32_t DataSize; // 0x08 12306c3fb27SDimitry Andric uint32_t unknown0c; // 0x0c 12406c3fb27SDimitry Andric uint32_t CompressedSize; // 0x10 12506c3fb27SDimitry Andric uint32_t SubHeaderSize; // 0x14 12606c3fb27SDimitry Andric uint16_t VersionMinor; // 0x18 12706c3fb27SDimitry Andric uint16_t VersionMajor; // 0x1a 12806c3fb27SDimitry Andric uint32_t CudaArch; // 0x1c 12906c3fb27SDimitry Andric uint32_t unknown20; // 0x20 13006c3fb27SDimitry Andric uint32_t unknown24; // 0x24 13106c3fb27SDimitry Andric uint32_t Flags; // 0x28 13206c3fb27SDimitry Andric uint32_t unknown2c; // 0x2c 13306c3fb27SDimitry Andric uint32_t unknown30; // 0x30 13406c3fb27SDimitry Andric uint32_t unknown34; // 0x34 13506c3fb27SDimitry Andric uint32_t UncompressedSize; // 0x38 13606c3fb27SDimitry Andric uint32_t unknown3c; // 0x3c 13706c3fb27SDimitry Andric uint32_t unknown40; // 0x40 13806c3fb27SDimitry Andric uint32_t unknown44; // 0x44 13906c3fb27SDimitry Andric FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags) 14006c3fb27SDimitry Andric : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)), 14106c3fb27SDimitry Andric DataSize(DataSize), unknown0c(0), CompressedSize(0), 14206c3fb27SDimitry Andric SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4), 14306c3fb27SDimitry Andric CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags), 14406c3fb27SDimitry Andric unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0), 14506c3fb27SDimitry Andric unknown3c(0), unknown40(0), unknown44(0) {} 14606c3fb27SDimitry Andric }; 14706c3fb27SDimitry Andric 14806c3fb27SDimitry Andric struct FatBinHeader { 14906c3fb27SDimitry Andric uint32_t Magic; // 0x00 15006c3fb27SDimitry Andric uint16_t Version; // 0x04 15106c3fb27SDimitry Andric uint16_t HeaderSize; // 0x06 15206c3fb27SDimitry Andric uint32_t DataSize; // 0x08 15306c3fb27SDimitry Andric uint32_t unknown0c; // 0x0c 15406c3fb27SDimitry Andric public: 15506c3fb27SDimitry Andric FatBinHeader(uint32_t DataSize) 15606c3fb27SDimitry Andric : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)), 15706c3fb27SDimitry Andric DataSize(DataSize), unknown0c(0) {} 15806c3fb27SDimitry Andric }; 15906c3fb27SDimitry Andric 16006c3fb27SDimitry Andric FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size()); 16106c3fb27SDimitry Andric FatbinContent.append((char *)&OuterHeader, 16206c3fb27SDimitry Andric ((char *)&OuterHeader) + OuterHeader.HeaderSize); 16306c3fb27SDimitry Andric 16406c3fb27SDimitry Andric FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion, 16506c3fb27SDimitry Andric FatBinFlags::AddressSize64 | 16606c3fb27SDimitry Andric FatBinFlags::HostLinux); 16706c3fb27SDimitry Andric FatbinContent.append((char *)&InnerHeader, 16806c3fb27SDimitry Andric ((char *)&InnerHeader) + InnerHeader.HeaderSize); 16906c3fb27SDimitry Andric 17006c3fb27SDimitry Andric FatbinContent.append(PTXCode.begin(), PTXCode.end()); 17106c3fb27SDimitry Andric 17206c3fb27SDimitry Andric return llvm::Error::success(); 17306c3fb27SDimitry Andric } 17406c3fb27SDimitry Andric 17506c3fb27SDimitry Andric IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {} 17606c3fb27SDimitry Andric 17706c3fb27SDimitry Andric } // namespace clang 178