xref: /freebsd-src/contrib/llvm-project/clang/lib/Interpreter/DeviceOffload.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2*06c3fb27SDimitry Andric //
3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
8*06c3fb27SDimitry Andric //
9*06c3fb27SDimitry Andric // This file implements offloading to CUDA devices.
10*06c3fb27SDimitry Andric //
11*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
12*06c3fb27SDimitry Andric 
13*06c3fb27SDimitry Andric #include "DeviceOffload.h"
14*06c3fb27SDimitry Andric 
15*06c3fb27SDimitry Andric #include "clang/Basic/TargetOptions.h"
16*06c3fb27SDimitry Andric #include "clang/CodeGen/ModuleBuilder.h"
17*06c3fb27SDimitry Andric #include "clang/Frontend/CompilerInstance.h"
18*06c3fb27SDimitry Andric 
19*06c3fb27SDimitry Andric #include "llvm/IR/LegacyPassManager.h"
20*06c3fb27SDimitry Andric #include "llvm/MC/TargetRegistry.h"
21*06c3fb27SDimitry Andric #include "llvm/Target/TargetMachine.h"
22*06c3fb27SDimitry Andric 
23*06c3fb27SDimitry Andric namespace clang {
24*06c3fb27SDimitry Andric 
25*06c3fb27SDimitry Andric IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
26*06c3fb27SDimitry Andric     Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
27*06c3fb27SDimitry Andric     IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
28*06c3fb27SDimitry Andric     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
29*06c3fb27SDimitry Andric     llvm::Error &Err)
30*06c3fb27SDimitry Andric     : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err),
31*06c3fb27SDimitry Andric       HostParser(HostParser), VFS(FS) {
32*06c3fb27SDimitry Andric   if (Err)
33*06c3fb27SDimitry Andric     return;
34*06c3fb27SDimitry Andric   StringRef Arch = CI->getTargetOpts().CPU;
35*06c3fb27SDimitry Andric   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
36*06c3fb27SDimitry Andric     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
37*06c3fb27SDimitry Andric                                                "Invalid CUDA architecture",
38*06c3fb27SDimitry Andric                                                llvm::inconvertibleErrorCode()));
39*06c3fb27SDimitry Andric     return;
40*06c3fb27SDimitry Andric   }
41*06c3fb27SDimitry Andric }
42*06c3fb27SDimitry Andric 
43*06c3fb27SDimitry Andric llvm::Expected<PartialTranslationUnit &>
44*06c3fb27SDimitry Andric IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
45*06c3fb27SDimitry Andric   auto PTU = IncrementalParser::Parse(Input);
46*06c3fb27SDimitry Andric   if (!PTU)
47*06c3fb27SDimitry Andric     return PTU.takeError();
48*06c3fb27SDimitry Andric 
49*06c3fb27SDimitry Andric   auto PTX = GeneratePTX();
50*06c3fb27SDimitry Andric   if (!PTX)
51*06c3fb27SDimitry Andric     return PTX.takeError();
52*06c3fb27SDimitry Andric 
53*06c3fb27SDimitry Andric   auto Err = GenerateFatbinary();
54*06c3fb27SDimitry Andric   if (Err)
55*06c3fb27SDimitry Andric     return std::move(Err);
56*06c3fb27SDimitry Andric 
57*06c3fb27SDimitry Andric   std::string FatbinFileName =
58*06c3fb27SDimitry Andric       "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
59*06c3fb27SDimitry Andric   VFS->addFile(FatbinFileName, 0,
60*06c3fb27SDimitry Andric                llvm::MemoryBuffer::getMemBuffer(
61*06c3fb27SDimitry Andric                    llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
62*06c3fb27SDimitry Andric                    "", false));
63*06c3fb27SDimitry Andric 
64*06c3fb27SDimitry Andric   HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName;
65*06c3fb27SDimitry Andric 
66*06c3fb27SDimitry Andric   FatbinContent.clear();
67*06c3fb27SDimitry Andric 
68*06c3fb27SDimitry Andric   return PTU;
69*06c3fb27SDimitry Andric }
70*06c3fb27SDimitry Andric 
71*06c3fb27SDimitry Andric llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
72*06c3fb27SDimitry Andric   auto &PTU = PTUs.back();
73*06c3fb27SDimitry Andric   std::string Error;
74*06c3fb27SDimitry Andric 
75*06c3fb27SDimitry Andric   const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
76*06c3fb27SDimitry Andric       PTU.TheModule->getTargetTriple(), Error);
77*06c3fb27SDimitry Andric   if (!Target)
78*06c3fb27SDimitry Andric     return llvm::make_error<llvm::StringError>(std::move(Error),
79*06c3fb27SDimitry Andric                                                std::error_code());
80*06c3fb27SDimitry Andric   llvm::TargetOptions TO = llvm::TargetOptions();
81*06c3fb27SDimitry Andric   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
82*06c3fb27SDimitry Andric       PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO,
83*06c3fb27SDimitry Andric       llvm::Reloc::Model::PIC_);
84*06c3fb27SDimitry Andric   PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
85*06c3fb27SDimitry Andric 
86*06c3fb27SDimitry Andric   PTXCode.clear();
87*06c3fb27SDimitry Andric   llvm::raw_svector_ostream dest(PTXCode);
88*06c3fb27SDimitry Andric 
89*06c3fb27SDimitry Andric   llvm::legacy::PassManager PM;
90*06c3fb27SDimitry Andric   if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
91*06c3fb27SDimitry Andric                                          llvm::CGFT_AssemblyFile)) {
92*06c3fb27SDimitry Andric     return llvm::make_error<llvm::StringError>(
93*06c3fb27SDimitry Andric         "NVPTX backend cannot produce PTX code.",
94*06c3fb27SDimitry Andric         llvm::inconvertibleErrorCode());
95*06c3fb27SDimitry Andric   }
96*06c3fb27SDimitry Andric 
97*06c3fb27SDimitry Andric   if (!PM.run(*PTU.TheModule))
98*06c3fb27SDimitry Andric     return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
99*06c3fb27SDimitry Andric                                                llvm::inconvertibleErrorCode());
100*06c3fb27SDimitry Andric 
101*06c3fb27SDimitry Andric   PTXCode += '\0';
102*06c3fb27SDimitry Andric   while (PTXCode.size() % 8)
103*06c3fb27SDimitry Andric     PTXCode += '\0';
104*06c3fb27SDimitry Andric   return PTXCode.str();
105*06c3fb27SDimitry Andric }
106*06c3fb27SDimitry Andric 
107*06c3fb27SDimitry Andric llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
108*06c3fb27SDimitry Andric   enum FatBinFlags {
109*06c3fb27SDimitry Andric     AddressSize64 = 0x01,
110*06c3fb27SDimitry Andric     HasDebugInfo = 0x02,
111*06c3fb27SDimitry Andric     ProducerCuda = 0x04,
112*06c3fb27SDimitry Andric     HostLinux = 0x10,
113*06c3fb27SDimitry Andric     HostMac = 0x20,
114*06c3fb27SDimitry Andric     HostWindows = 0x40
115*06c3fb27SDimitry Andric   };
116*06c3fb27SDimitry Andric 
117*06c3fb27SDimitry Andric   struct FatBinInnerHeader {
118*06c3fb27SDimitry Andric     uint16_t Kind;             // 0x00
119*06c3fb27SDimitry Andric     uint16_t unknown02;        // 0x02
120*06c3fb27SDimitry Andric     uint32_t HeaderSize;       // 0x04
121*06c3fb27SDimitry Andric     uint32_t DataSize;         // 0x08
122*06c3fb27SDimitry Andric     uint32_t unknown0c;        // 0x0c
123*06c3fb27SDimitry Andric     uint32_t CompressedSize;   // 0x10
124*06c3fb27SDimitry Andric     uint32_t SubHeaderSize;    // 0x14
125*06c3fb27SDimitry Andric     uint16_t VersionMinor;     // 0x18
126*06c3fb27SDimitry Andric     uint16_t VersionMajor;     // 0x1a
127*06c3fb27SDimitry Andric     uint32_t CudaArch;         // 0x1c
128*06c3fb27SDimitry Andric     uint32_t unknown20;        // 0x20
129*06c3fb27SDimitry Andric     uint32_t unknown24;        // 0x24
130*06c3fb27SDimitry Andric     uint32_t Flags;            // 0x28
131*06c3fb27SDimitry Andric     uint32_t unknown2c;        // 0x2c
132*06c3fb27SDimitry Andric     uint32_t unknown30;        // 0x30
133*06c3fb27SDimitry Andric     uint32_t unknown34;        // 0x34
134*06c3fb27SDimitry Andric     uint32_t UncompressedSize; // 0x38
135*06c3fb27SDimitry Andric     uint32_t unknown3c;        // 0x3c
136*06c3fb27SDimitry Andric     uint32_t unknown40;        // 0x40
137*06c3fb27SDimitry Andric     uint32_t unknown44;        // 0x44
138*06c3fb27SDimitry Andric     FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
139*06c3fb27SDimitry Andric         : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
140*06c3fb27SDimitry Andric           DataSize(DataSize), unknown0c(0), CompressedSize(0),
141*06c3fb27SDimitry Andric           SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
142*06c3fb27SDimitry Andric           CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
143*06c3fb27SDimitry Andric           unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
144*06c3fb27SDimitry Andric           unknown3c(0), unknown40(0), unknown44(0) {}
145*06c3fb27SDimitry Andric   };
146*06c3fb27SDimitry Andric 
147*06c3fb27SDimitry Andric   struct FatBinHeader {
148*06c3fb27SDimitry Andric     uint32_t Magic;      // 0x00
149*06c3fb27SDimitry Andric     uint16_t Version;    // 0x04
150*06c3fb27SDimitry Andric     uint16_t HeaderSize; // 0x06
151*06c3fb27SDimitry Andric     uint32_t DataSize;   // 0x08
152*06c3fb27SDimitry Andric     uint32_t unknown0c;  // 0x0c
153*06c3fb27SDimitry Andric   public:
154*06c3fb27SDimitry Andric     FatBinHeader(uint32_t DataSize)
155*06c3fb27SDimitry Andric         : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
156*06c3fb27SDimitry Andric           DataSize(DataSize), unknown0c(0) {}
157*06c3fb27SDimitry Andric   };
158*06c3fb27SDimitry Andric 
159*06c3fb27SDimitry Andric   FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
160*06c3fb27SDimitry Andric   FatbinContent.append((char *)&OuterHeader,
161*06c3fb27SDimitry Andric                        ((char *)&OuterHeader) + OuterHeader.HeaderSize);
162*06c3fb27SDimitry Andric 
163*06c3fb27SDimitry Andric   FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
164*06c3fb27SDimitry Andric                                 FatBinFlags::AddressSize64 |
165*06c3fb27SDimitry Andric                                     FatBinFlags::HostLinux);
166*06c3fb27SDimitry Andric   FatbinContent.append((char *)&InnerHeader,
167*06c3fb27SDimitry Andric                        ((char *)&InnerHeader) + InnerHeader.HeaderSize);
168*06c3fb27SDimitry Andric 
169*06c3fb27SDimitry Andric   FatbinContent.append(PTXCode.begin(), PTXCode.end());
170*06c3fb27SDimitry Andric 
171*06c3fb27SDimitry Andric   return llvm::Error::success();
172*06c3fb27SDimitry Andric }
173*06c3fb27SDimitry Andric 
174*06c3fb27SDimitry Andric IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
175*06c3fb27SDimitry Andric 
176*06c3fb27SDimitry Andric } // namespace clang
177