xref: /llvm-project/clang/lib/Interpreter/DeviceOffload.cpp (revision a72d7eea5413444249670579fecea6823fb3c564)
1 //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements offloading to CUDA devices.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "DeviceOffload.h"
14 
15 #include "clang/Basic/TargetOptions.h"
16 #include "clang/CodeGen/ModuleBuilder.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Interpreter/PartialTranslationUnit.h"
19 
20 #include "llvm/IR/LegacyPassManager.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/MC/TargetRegistry.h"
23 #include "llvm/Target/TargetMachine.h"
24 
25 namespace clang {
26 
27 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
28     std::unique_ptr<CompilerInstance> DeviceInstance,
29     CompilerInstance &HostInstance,
30     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
31     llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
32     : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
33       CodeGenOpts(HostInstance.getCodeGenOpts()),
34       TargetOpts(HostInstance.getTargetOpts()) {
35   if (Err)
36     return;
37   DeviceCI = std::move(DeviceInstance);
38   StringRef Arch = TargetOpts.CPU;
39   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
40     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
41                                                "Invalid CUDA architecture",
42                                                llvm::inconvertibleErrorCode()));
43     return;
44   }
45 }
46 
47 llvm::Expected<TranslationUnitDecl *>
48 IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
49   auto PTU = IncrementalParser::Parse(Input);
50   if (!PTU)
51     return PTU.takeError();
52 
53   auto PTX = GeneratePTX();
54   if (!PTX)
55     return PTX.takeError();
56 
57   auto Err = GenerateFatbinary();
58   if (Err)
59     return std::move(Err);
60 
61   std::string FatbinFileName =
62       "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
63   VFS->addFile(FatbinFileName, 0,
64                llvm::MemoryBuffer::getMemBuffer(
65                    llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
66                    "", false));
67 
68   CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
69 
70   FatbinContent.clear();
71 
72   return PTU;
73 }
74 
75 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
76   auto &PTU = PTUs.back();
77   std::string Error;
78 
79   const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
80       PTU.TheModule->getTargetTriple(), Error);
81   if (!Target)
82     return llvm::make_error<llvm::StringError>(std::move(Error),
83                                                std::error_code());
84   llvm::TargetOptions TO = llvm::TargetOptions();
85   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
86       PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
87       llvm::Reloc::Model::PIC_);
88   PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
89 
90   PTXCode.clear();
91   llvm::raw_svector_ostream dest(PTXCode);
92 
93   llvm::legacy::PassManager PM;
94   if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
95                                          llvm::CodeGenFileType::AssemblyFile)) {
96     return llvm::make_error<llvm::StringError>(
97         "NVPTX backend cannot produce PTX code.",
98         llvm::inconvertibleErrorCode());
99   }
100 
101   if (!PM.run(*PTU.TheModule))
102     return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
103                                                llvm::inconvertibleErrorCode());
104 
105   PTXCode += '\0';
106   while (PTXCode.size() % 8)
107     PTXCode += '\0';
108   return PTXCode.str();
109 }
110 
111 llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
112   enum FatBinFlags {
113     AddressSize64 = 0x01,
114     HasDebugInfo = 0x02,
115     ProducerCuda = 0x04,
116     HostLinux = 0x10,
117     HostMac = 0x20,
118     HostWindows = 0x40
119   };
120 
121   struct FatBinInnerHeader {
122     uint16_t Kind;             // 0x00
123     uint16_t unknown02;        // 0x02
124     uint32_t HeaderSize;       // 0x04
125     uint32_t DataSize;         // 0x08
126     uint32_t unknown0c;        // 0x0c
127     uint32_t CompressedSize;   // 0x10
128     uint32_t SubHeaderSize;    // 0x14
129     uint16_t VersionMinor;     // 0x18
130     uint16_t VersionMajor;     // 0x1a
131     uint32_t CudaArch;         // 0x1c
132     uint32_t unknown20;        // 0x20
133     uint32_t unknown24;        // 0x24
134     uint32_t Flags;            // 0x28
135     uint32_t unknown2c;        // 0x2c
136     uint32_t unknown30;        // 0x30
137     uint32_t unknown34;        // 0x34
138     uint32_t UncompressedSize; // 0x38
139     uint32_t unknown3c;        // 0x3c
140     uint32_t unknown40;        // 0x40
141     uint32_t unknown44;        // 0x44
142     FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
143         : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
144           DataSize(DataSize), unknown0c(0), CompressedSize(0),
145           SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
146           CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
147           unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
148           unknown3c(0), unknown40(0), unknown44(0) {}
149   };
150 
151   struct FatBinHeader {
152     uint32_t Magic;      // 0x00
153     uint16_t Version;    // 0x04
154     uint16_t HeaderSize; // 0x06
155     uint32_t DataSize;   // 0x08
156     uint32_t unknown0c;  // 0x0c
157   public:
158     FatBinHeader(uint32_t DataSize)
159         : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
160           DataSize(DataSize), unknown0c(0) {}
161   };
162 
163   FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
164   FatbinContent.append((char *)&OuterHeader,
165                        ((char *)&OuterHeader) + OuterHeader.HeaderSize);
166 
167   FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
168                                 FatBinFlags::AddressSize64 |
169                                     FatBinFlags::HostLinux);
170   FatbinContent.append((char *)&InnerHeader,
171                        ((char *)&InnerHeader) + InnerHeader.HeaderSize);
172 
173   FatbinContent.append(PTXCode.begin(), PTXCode.end());
174 
175   return llvm::Error::success();
176 }
177 
178 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
179 
180 } // namespace clang
181