xref: /llvm-project/clang/lib/Interpreter/DeviceOffload.cpp (revision a72d7eea5413444249670579fecea6823fb3c564)
1ddeab07cSAnubhab Ghosh //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2ddeab07cSAnubhab Ghosh //
3ddeab07cSAnubhab Ghosh // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ddeab07cSAnubhab Ghosh // See https://llvm.org/LICENSE.txt for license information.
5ddeab07cSAnubhab Ghosh // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ddeab07cSAnubhab Ghosh //
7ddeab07cSAnubhab Ghosh //===----------------------------------------------------------------------===//
8ddeab07cSAnubhab Ghosh //
9ddeab07cSAnubhab Ghosh // This file implements offloading to CUDA devices.
10ddeab07cSAnubhab Ghosh //
11ddeab07cSAnubhab Ghosh //===----------------------------------------------------------------------===//
12ddeab07cSAnubhab Ghosh 
13ddeab07cSAnubhab Ghosh #include "DeviceOffload.h"
14ddeab07cSAnubhab Ghosh 
15ddeab07cSAnubhab Ghosh #include "clang/Basic/TargetOptions.h"
16ddeab07cSAnubhab Ghosh #include "clang/CodeGen/ModuleBuilder.h"
17ddeab07cSAnubhab Ghosh #include "clang/Frontend/CompilerInstance.h"
18*a72d7eeaSVassil Vassilev #include "clang/Interpreter/PartialTranslationUnit.h"
19ddeab07cSAnubhab Ghosh 
20ddeab07cSAnubhab Ghosh #include "llvm/IR/LegacyPassManager.h"
214169338eSNikita Popov #include "llvm/IR/Module.h"
22ddeab07cSAnubhab Ghosh #include "llvm/MC/TargetRegistry.h"
23ddeab07cSAnubhab Ghosh #include "llvm/Target/TargetMachine.h"
24ddeab07cSAnubhab Ghosh 
25ddeab07cSAnubhab Ghosh namespace clang {
26ddeab07cSAnubhab Ghosh 
27ddeab07cSAnubhab Ghosh IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
28*a72d7eeaSVassil Vassilev     std::unique_ptr<CompilerInstance> DeviceInstance,
29*a72d7eeaSVassil Vassilev     CompilerInstance &HostInstance,
30ddeab07cSAnubhab Ghosh     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
31*a72d7eeaSVassil Vassilev     llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
32*a72d7eeaSVassil Vassilev     : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
33*a72d7eeaSVassil Vassilev       CodeGenOpts(HostInstance.getCodeGenOpts()),
34*a72d7eeaSVassil Vassilev       TargetOpts(HostInstance.getTargetOpts()) {
35ddeab07cSAnubhab Ghosh   if (Err)
36ddeab07cSAnubhab Ghosh     return;
37*a72d7eeaSVassil Vassilev   DeviceCI = std::move(DeviceInstance);
38*a72d7eeaSVassil Vassilev   StringRef Arch = TargetOpts.CPU;
39ddeab07cSAnubhab Ghosh   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
40ddeab07cSAnubhab Ghosh     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
41ddeab07cSAnubhab Ghosh                                                "Invalid CUDA architecture",
42ddeab07cSAnubhab Ghosh                                                llvm::inconvertibleErrorCode()));
43ddeab07cSAnubhab Ghosh     return;
44ddeab07cSAnubhab Ghosh   }
45ddeab07cSAnubhab Ghosh }
46ddeab07cSAnubhab Ghosh 
47*a72d7eeaSVassil Vassilev llvm::Expected<TranslationUnitDecl *>
48ddeab07cSAnubhab Ghosh IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
49ddeab07cSAnubhab Ghosh   auto PTU = IncrementalParser::Parse(Input);
50ddeab07cSAnubhab Ghosh   if (!PTU)
51ddeab07cSAnubhab Ghosh     return PTU.takeError();
52ddeab07cSAnubhab Ghosh 
53ddeab07cSAnubhab Ghosh   auto PTX = GeneratePTX();
54ddeab07cSAnubhab Ghosh   if (!PTX)
55ddeab07cSAnubhab Ghosh     return PTX.takeError();
56ddeab07cSAnubhab Ghosh 
57ddeab07cSAnubhab Ghosh   auto Err = GenerateFatbinary();
58ddeab07cSAnubhab Ghosh   if (Err)
59ddeab07cSAnubhab Ghosh     return std::move(Err);
60ddeab07cSAnubhab Ghosh 
61ddeab07cSAnubhab Ghosh   std::string FatbinFileName =
62ddeab07cSAnubhab Ghosh       "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
63ddeab07cSAnubhab Ghosh   VFS->addFile(FatbinFileName, 0,
64ddeab07cSAnubhab Ghosh                llvm::MemoryBuffer::getMemBuffer(
65ddeab07cSAnubhab Ghosh                    llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
66ddeab07cSAnubhab Ghosh                    "", false));
67ddeab07cSAnubhab Ghosh 
68*a72d7eeaSVassil Vassilev   CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
69ddeab07cSAnubhab Ghosh 
70ddeab07cSAnubhab Ghosh   FatbinContent.clear();
71ddeab07cSAnubhab Ghosh 
72ddeab07cSAnubhab Ghosh   return PTU;
73ddeab07cSAnubhab Ghosh }
74ddeab07cSAnubhab Ghosh 
75ddeab07cSAnubhab Ghosh llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
76ddeab07cSAnubhab Ghosh   auto &PTU = PTUs.back();
77ddeab07cSAnubhab Ghosh   std::string Error;
78ddeab07cSAnubhab Ghosh 
79ddeab07cSAnubhab Ghosh   const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
80ddeab07cSAnubhab Ghosh       PTU.TheModule->getTargetTriple(), Error);
81ddeab07cSAnubhab Ghosh   if (!Target)
82ddeab07cSAnubhab Ghosh     return llvm::make_error<llvm::StringError>(std::move(Error),
83ddeab07cSAnubhab Ghosh                                                std::error_code());
84ddeab07cSAnubhab Ghosh   llvm::TargetOptions TO = llvm::TargetOptions();
85ddeab07cSAnubhab Ghosh   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
86*a72d7eeaSVassil Vassilev       PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
87ddeab07cSAnubhab Ghosh       llvm::Reloc::Model::PIC_);
88ddeab07cSAnubhab Ghosh   PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
89ddeab07cSAnubhab Ghosh 
90ddeab07cSAnubhab Ghosh   PTXCode.clear();
91ddeab07cSAnubhab Ghosh   llvm::raw_svector_ostream dest(PTXCode);
92ddeab07cSAnubhab Ghosh 
93ddeab07cSAnubhab Ghosh   llvm::legacy::PassManager PM;
94ddeab07cSAnubhab Ghosh   if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
950a1aa6cdSArthur Eubanks                                          llvm::CodeGenFileType::AssemblyFile)) {
96ddeab07cSAnubhab Ghosh     return llvm::make_error<llvm::StringError>(
97ddeab07cSAnubhab Ghosh         "NVPTX backend cannot produce PTX code.",
98ddeab07cSAnubhab Ghosh         llvm::inconvertibleErrorCode());
99ddeab07cSAnubhab Ghosh   }
100ddeab07cSAnubhab Ghosh 
101ddeab07cSAnubhab Ghosh   if (!PM.run(*PTU.TheModule))
102ddeab07cSAnubhab Ghosh     return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
103ddeab07cSAnubhab Ghosh                                                llvm::inconvertibleErrorCode());
104ddeab07cSAnubhab Ghosh 
105ddeab07cSAnubhab Ghosh   PTXCode += '\0';
106ddeab07cSAnubhab Ghosh   while (PTXCode.size() % 8)
107ddeab07cSAnubhab Ghosh     PTXCode += '\0';
108ddeab07cSAnubhab Ghosh   return PTXCode.str();
109ddeab07cSAnubhab Ghosh }
110ddeab07cSAnubhab Ghosh 
111ddeab07cSAnubhab Ghosh llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
112ddeab07cSAnubhab Ghosh   enum FatBinFlags {
113ddeab07cSAnubhab Ghosh     AddressSize64 = 0x01,
114ddeab07cSAnubhab Ghosh     HasDebugInfo = 0x02,
115ddeab07cSAnubhab Ghosh     ProducerCuda = 0x04,
116ddeab07cSAnubhab Ghosh     HostLinux = 0x10,
117ddeab07cSAnubhab Ghosh     HostMac = 0x20,
118ddeab07cSAnubhab Ghosh     HostWindows = 0x40
119ddeab07cSAnubhab Ghosh   };
120ddeab07cSAnubhab Ghosh 
121ddeab07cSAnubhab Ghosh   struct FatBinInnerHeader {
122ddeab07cSAnubhab Ghosh     uint16_t Kind;             // 0x00
123ddeab07cSAnubhab Ghosh     uint16_t unknown02;        // 0x02
124ddeab07cSAnubhab Ghosh     uint32_t HeaderSize;       // 0x04
125ddeab07cSAnubhab Ghosh     uint32_t DataSize;         // 0x08
126ddeab07cSAnubhab Ghosh     uint32_t unknown0c;        // 0x0c
127ddeab07cSAnubhab Ghosh     uint32_t CompressedSize;   // 0x10
128ddeab07cSAnubhab Ghosh     uint32_t SubHeaderSize;    // 0x14
129ddeab07cSAnubhab Ghosh     uint16_t VersionMinor;     // 0x18
130ddeab07cSAnubhab Ghosh     uint16_t VersionMajor;     // 0x1a
131ddeab07cSAnubhab Ghosh     uint32_t CudaArch;         // 0x1c
132ddeab07cSAnubhab Ghosh     uint32_t unknown20;        // 0x20
133ddeab07cSAnubhab Ghosh     uint32_t unknown24;        // 0x24
134ddeab07cSAnubhab Ghosh     uint32_t Flags;            // 0x28
135ddeab07cSAnubhab Ghosh     uint32_t unknown2c;        // 0x2c
136ddeab07cSAnubhab Ghosh     uint32_t unknown30;        // 0x30
137ddeab07cSAnubhab Ghosh     uint32_t unknown34;        // 0x34
138ddeab07cSAnubhab Ghosh     uint32_t UncompressedSize; // 0x38
139ddeab07cSAnubhab Ghosh     uint32_t unknown3c;        // 0x3c
140ddeab07cSAnubhab Ghosh     uint32_t unknown40;        // 0x40
141ddeab07cSAnubhab Ghosh     uint32_t unknown44;        // 0x44
142ddeab07cSAnubhab Ghosh     FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
143ddeab07cSAnubhab Ghosh         : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
144ddeab07cSAnubhab Ghosh           DataSize(DataSize), unknown0c(0), CompressedSize(0),
145ddeab07cSAnubhab Ghosh           SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
146ddeab07cSAnubhab Ghosh           CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
147ddeab07cSAnubhab Ghosh           unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
148ddeab07cSAnubhab Ghosh           unknown3c(0), unknown40(0), unknown44(0) {}
149ddeab07cSAnubhab Ghosh   };
150ddeab07cSAnubhab Ghosh 
151ddeab07cSAnubhab Ghosh   struct FatBinHeader {
152ddeab07cSAnubhab Ghosh     uint32_t Magic;      // 0x00
153ddeab07cSAnubhab Ghosh     uint16_t Version;    // 0x04
154ddeab07cSAnubhab Ghosh     uint16_t HeaderSize; // 0x06
155ddeab07cSAnubhab Ghosh     uint32_t DataSize;   // 0x08
156ddeab07cSAnubhab Ghosh     uint32_t unknown0c;  // 0x0c
157ddeab07cSAnubhab Ghosh   public:
158ddeab07cSAnubhab Ghosh     FatBinHeader(uint32_t DataSize)
159ddeab07cSAnubhab Ghosh         : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
160ddeab07cSAnubhab Ghosh           DataSize(DataSize), unknown0c(0) {}
161ddeab07cSAnubhab Ghosh   };
162ddeab07cSAnubhab Ghosh 
163ddeab07cSAnubhab Ghosh   FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
164ddeab07cSAnubhab Ghosh   FatbinContent.append((char *)&OuterHeader,
165ddeab07cSAnubhab Ghosh                        ((char *)&OuterHeader) + OuterHeader.HeaderSize);
166ddeab07cSAnubhab Ghosh 
167ddeab07cSAnubhab Ghosh   FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
168ddeab07cSAnubhab Ghosh                                 FatBinFlags::AddressSize64 |
169ddeab07cSAnubhab Ghosh                                     FatBinFlags::HostLinux);
170ddeab07cSAnubhab Ghosh   FatbinContent.append((char *)&InnerHeader,
171ddeab07cSAnubhab Ghosh                        ((char *)&InnerHeader) + InnerHeader.HeaderSize);
172ddeab07cSAnubhab Ghosh 
173ddeab07cSAnubhab Ghosh   FatbinContent.append(PTXCode.begin(), PTXCode.end());
174ddeab07cSAnubhab Ghosh 
175ddeab07cSAnubhab Ghosh   return llvm::Error::success();
176ddeab07cSAnubhab Ghosh }
177ddeab07cSAnubhab Ghosh 
178ddeab07cSAnubhab Ghosh IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
179ddeab07cSAnubhab Ghosh 
180ddeab07cSAnubhab Ghosh } // namespace clang
181