xref: /freebsd-src/contrib/llvm-project/clang/lib/Interpreter/DeviceOffload.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric //
906c3fb27SDimitry Andric // This file implements offloading to CUDA devices.
1006c3fb27SDimitry Andric //
1106c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
1206c3fb27SDimitry Andric 
1306c3fb27SDimitry Andric #include "DeviceOffload.h"
1406c3fb27SDimitry Andric 
1506c3fb27SDimitry Andric #include "clang/Basic/TargetOptions.h"
1606c3fb27SDimitry Andric #include "clang/CodeGen/ModuleBuilder.h"
1706c3fb27SDimitry Andric #include "clang/Frontend/CompilerInstance.h"
1806c3fb27SDimitry Andric 
1906c3fb27SDimitry Andric #include "llvm/IR/LegacyPassManager.h"
20*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h"
2106c3fb27SDimitry Andric #include "llvm/MC/TargetRegistry.h"
2206c3fb27SDimitry Andric #include "llvm/Target/TargetMachine.h"
2306c3fb27SDimitry Andric 
2406c3fb27SDimitry Andric namespace clang {
2506c3fb27SDimitry Andric 
2606c3fb27SDimitry Andric IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
2706c3fb27SDimitry Andric     Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
2806c3fb27SDimitry Andric     IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
2906c3fb27SDimitry Andric     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
3006c3fb27SDimitry Andric     llvm::Error &Err)
3106c3fb27SDimitry Andric     : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err),
3206c3fb27SDimitry Andric       HostParser(HostParser), VFS(FS) {
3306c3fb27SDimitry Andric   if (Err)
3406c3fb27SDimitry Andric     return;
3506c3fb27SDimitry Andric   StringRef Arch = CI->getTargetOpts().CPU;
3606c3fb27SDimitry Andric   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
3706c3fb27SDimitry Andric     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
3806c3fb27SDimitry Andric                                                "Invalid CUDA architecture",
3906c3fb27SDimitry Andric                                                llvm::inconvertibleErrorCode()));
4006c3fb27SDimitry Andric     return;
4106c3fb27SDimitry Andric   }
4206c3fb27SDimitry Andric }
4306c3fb27SDimitry Andric 
4406c3fb27SDimitry Andric llvm::Expected<PartialTranslationUnit &>
4506c3fb27SDimitry Andric IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
4606c3fb27SDimitry Andric   auto PTU = IncrementalParser::Parse(Input);
4706c3fb27SDimitry Andric   if (!PTU)
4806c3fb27SDimitry Andric     return PTU.takeError();
4906c3fb27SDimitry Andric 
5006c3fb27SDimitry Andric   auto PTX = GeneratePTX();
5106c3fb27SDimitry Andric   if (!PTX)
5206c3fb27SDimitry Andric     return PTX.takeError();
5306c3fb27SDimitry Andric 
5406c3fb27SDimitry Andric   auto Err = GenerateFatbinary();
5506c3fb27SDimitry Andric   if (Err)
5606c3fb27SDimitry Andric     return std::move(Err);
5706c3fb27SDimitry Andric 
5806c3fb27SDimitry Andric   std::string FatbinFileName =
5906c3fb27SDimitry Andric       "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
6006c3fb27SDimitry Andric   VFS->addFile(FatbinFileName, 0,
6106c3fb27SDimitry Andric                llvm::MemoryBuffer::getMemBuffer(
6206c3fb27SDimitry Andric                    llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
6306c3fb27SDimitry Andric                    "", false));
6406c3fb27SDimitry Andric 
6506c3fb27SDimitry Andric   HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName;
6606c3fb27SDimitry Andric 
6706c3fb27SDimitry Andric   FatbinContent.clear();
6806c3fb27SDimitry Andric 
6906c3fb27SDimitry Andric   return PTU;
7006c3fb27SDimitry Andric }
7106c3fb27SDimitry Andric 
7206c3fb27SDimitry Andric llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
7306c3fb27SDimitry Andric   auto &PTU = PTUs.back();
7406c3fb27SDimitry Andric   std::string Error;
7506c3fb27SDimitry Andric 
7606c3fb27SDimitry Andric   const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
7706c3fb27SDimitry Andric       PTU.TheModule->getTargetTriple(), Error);
7806c3fb27SDimitry Andric   if (!Target)
7906c3fb27SDimitry Andric     return llvm::make_error<llvm::StringError>(std::move(Error),
8006c3fb27SDimitry Andric                                                std::error_code());
8106c3fb27SDimitry Andric   llvm::TargetOptions TO = llvm::TargetOptions();
8206c3fb27SDimitry Andric   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
8306c3fb27SDimitry Andric       PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO,
8406c3fb27SDimitry Andric       llvm::Reloc::Model::PIC_);
8506c3fb27SDimitry Andric   PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
8606c3fb27SDimitry Andric 
8706c3fb27SDimitry Andric   PTXCode.clear();
8806c3fb27SDimitry Andric   llvm::raw_svector_ostream dest(PTXCode);
8906c3fb27SDimitry Andric 
9006c3fb27SDimitry Andric   llvm::legacy::PassManager PM;
9106c3fb27SDimitry Andric   if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
925f757f3fSDimitry Andric                                          llvm::CodeGenFileType::AssemblyFile)) {
9306c3fb27SDimitry Andric     return llvm::make_error<llvm::StringError>(
9406c3fb27SDimitry Andric         "NVPTX backend cannot produce PTX code.",
9506c3fb27SDimitry Andric         llvm::inconvertibleErrorCode());
9606c3fb27SDimitry Andric   }
9706c3fb27SDimitry Andric 
9806c3fb27SDimitry Andric   if (!PM.run(*PTU.TheModule))
9906c3fb27SDimitry Andric     return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
10006c3fb27SDimitry Andric                                                llvm::inconvertibleErrorCode());
10106c3fb27SDimitry Andric 
10206c3fb27SDimitry Andric   PTXCode += '\0';
10306c3fb27SDimitry Andric   while (PTXCode.size() % 8)
10406c3fb27SDimitry Andric     PTXCode += '\0';
10506c3fb27SDimitry Andric   return PTXCode.str();
10606c3fb27SDimitry Andric }
10706c3fb27SDimitry Andric 
10806c3fb27SDimitry Andric llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
10906c3fb27SDimitry Andric   enum FatBinFlags {
11006c3fb27SDimitry Andric     AddressSize64 = 0x01,
11106c3fb27SDimitry Andric     HasDebugInfo = 0x02,
11206c3fb27SDimitry Andric     ProducerCuda = 0x04,
11306c3fb27SDimitry Andric     HostLinux = 0x10,
11406c3fb27SDimitry Andric     HostMac = 0x20,
11506c3fb27SDimitry Andric     HostWindows = 0x40
11606c3fb27SDimitry Andric   };
11706c3fb27SDimitry Andric 
11806c3fb27SDimitry Andric   struct FatBinInnerHeader {
11906c3fb27SDimitry Andric     uint16_t Kind;             // 0x00
12006c3fb27SDimitry Andric     uint16_t unknown02;        // 0x02
12106c3fb27SDimitry Andric     uint32_t HeaderSize;       // 0x04
12206c3fb27SDimitry Andric     uint32_t DataSize;         // 0x08
12306c3fb27SDimitry Andric     uint32_t unknown0c;        // 0x0c
12406c3fb27SDimitry Andric     uint32_t CompressedSize;   // 0x10
12506c3fb27SDimitry Andric     uint32_t SubHeaderSize;    // 0x14
12606c3fb27SDimitry Andric     uint16_t VersionMinor;     // 0x18
12706c3fb27SDimitry Andric     uint16_t VersionMajor;     // 0x1a
12806c3fb27SDimitry Andric     uint32_t CudaArch;         // 0x1c
12906c3fb27SDimitry Andric     uint32_t unknown20;        // 0x20
13006c3fb27SDimitry Andric     uint32_t unknown24;        // 0x24
13106c3fb27SDimitry Andric     uint32_t Flags;            // 0x28
13206c3fb27SDimitry Andric     uint32_t unknown2c;        // 0x2c
13306c3fb27SDimitry Andric     uint32_t unknown30;        // 0x30
13406c3fb27SDimitry Andric     uint32_t unknown34;        // 0x34
13506c3fb27SDimitry Andric     uint32_t UncompressedSize; // 0x38
13606c3fb27SDimitry Andric     uint32_t unknown3c;        // 0x3c
13706c3fb27SDimitry Andric     uint32_t unknown40;        // 0x40
13806c3fb27SDimitry Andric     uint32_t unknown44;        // 0x44
13906c3fb27SDimitry Andric     FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
14006c3fb27SDimitry Andric         : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
14106c3fb27SDimitry Andric           DataSize(DataSize), unknown0c(0), CompressedSize(0),
14206c3fb27SDimitry Andric           SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
14306c3fb27SDimitry Andric           CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
14406c3fb27SDimitry Andric           unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
14506c3fb27SDimitry Andric           unknown3c(0), unknown40(0), unknown44(0) {}
14606c3fb27SDimitry Andric   };
14706c3fb27SDimitry Andric 
14806c3fb27SDimitry Andric   struct FatBinHeader {
14906c3fb27SDimitry Andric     uint32_t Magic;      // 0x00
15006c3fb27SDimitry Andric     uint16_t Version;    // 0x04
15106c3fb27SDimitry Andric     uint16_t HeaderSize; // 0x06
15206c3fb27SDimitry Andric     uint32_t DataSize;   // 0x08
15306c3fb27SDimitry Andric     uint32_t unknown0c;  // 0x0c
15406c3fb27SDimitry Andric   public:
15506c3fb27SDimitry Andric     FatBinHeader(uint32_t DataSize)
15606c3fb27SDimitry Andric         : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
15706c3fb27SDimitry Andric           DataSize(DataSize), unknown0c(0) {}
15806c3fb27SDimitry Andric   };
15906c3fb27SDimitry Andric 
16006c3fb27SDimitry Andric   FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
16106c3fb27SDimitry Andric   FatbinContent.append((char *)&OuterHeader,
16206c3fb27SDimitry Andric                        ((char *)&OuterHeader) + OuterHeader.HeaderSize);
16306c3fb27SDimitry Andric 
16406c3fb27SDimitry Andric   FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
16506c3fb27SDimitry Andric                                 FatBinFlags::AddressSize64 |
16606c3fb27SDimitry Andric                                     FatBinFlags::HostLinux);
16706c3fb27SDimitry Andric   FatbinContent.append((char *)&InnerHeader,
16806c3fb27SDimitry Andric                        ((char *)&InnerHeader) + InnerHeader.HeaderSize);
16906c3fb27SDimitry Andric 
17006c3fb27SDimitry Andric   FatbinContent.append(PTXCode.begin(), PTXCode.end());
17106c3fb27SDimitry Andric 
17206c3fb27SDimitry Andric   return llvm::Error::success();
17306c3fb27SDimitry Andric }
17406c3fb27SDimitry Andric 
17506c3fb27SDimitry Andric IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
17606c3fb27SDimitry Andric 
17706c3fb27SDimitry Andric } // namespace clang
178