xref: /llvm-project/mlir/lib/Target/LLVM/ROCDL/Target.cpp (revision 72e8b9aeaa3f584f223bc59924812df69a09a48b)
1 //===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines ROCDL target related functions including registration
10 // calls for the `#rocdl.target` compilation attribute.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "mlir/Target/LLVM/ROCDL/Target.h"
15 
16 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
17 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
18 #include "mlir/Support/FileUtilities.h"
19 #include "mlir/Target/LLVM/ROCDL/Utils.h"
20 #include "mlir/Target/LLVMIR/Export.h"
21 
22 #include "llvm/IR/Constants.h"
23 #include "llvm/MC/MCAsmBackend.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCCodeEmitter.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCObjectFileInfo.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/FileUtilities.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/Program.h"
39 #include "llvm/Support/SourceMgr.h"
40 #include "llvm/Support/TargetSelect.h"
41 #include "llvm/TargetParser/TargetParser.h"
42 
43 #include <cstdlib>
44 #include <optional>
45 
46 using namespace mlir;
47 using namespace mlir::ROCDL;
48 
49 #ifndef __DEFAULT_ROCM_PATH__
50 #define __DEFAULT_ROCM_PATH__ ""
51 #endif
52 
53 namespace {
54 // Implementation of the `TargetAttrInterface` model.
55 class ROCDLTargetAttrImpl
56     : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
57 public:
58   std::optional<SmallVector<char, 0>>
59   serializeToObject(Attribute attribute, Operation *module,
60                     const gpu::TargetOptions &options) const;
61 
62   Attribute createObject(Attribute attribute, Operation *module,
63                          const SmallVector<char, 0> &object,
64                          const gpu::TargetOptions &options) const;
65 };
66 } // namespace
67 
68 // Register the ROCDL dialect, the ROCDL translation and the target interface.
69 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
70     DialectRegistry &registry) {
71   registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) {
72     ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
73   });
74 }
75 
76 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
77     MLIRContext &context) {
78   DialectRegistry registry;
79   registerROCDLTargetInterfaceExternalModels(registry);
80   context.appendDialectRegistry(registry);
81 }
82 
83 // Search for the ROCM path.
84 StringRef mlir::ROCDL::getROCMPath() {
85   if (const char *var = std::getenv("ROCM_PATH"))
86     return var;
87   if (const char *var = std::getenv("ROCM_ROOT"))
88     return var;
89   if (const char *var = std::getenv("ROCM_HOME"))
90     return var;
91   return __DEFAULT_ROCM_PATH__;
92 }
93 
94 SerializeGPUModuleBase::SerializeGPUModuleBase(
95     Operation &module, ROCDLTargetAttr target,
96     const gpu::TargetOptions &targetOptions)
97     : ModuleToObject(module, target.getTriple(), target.getChip(),
98                      target.getFeatures(), target.getO()),
99       target(target), toolkitPath(targetOptions.getToolkitPath()),
100       librariesToLink(targetOptions.getLibrariesToLink()) {
101 
102   // If `targetOptions` has an empty toolkitPath use `getROCMPath`
103   if (toolkitPath.empty())
104     toolkitPath = getROCMPath();
105 
106   // Append the files in the target attribute.
107   if (target.getLink())
108     librariesToLink.append(target.getLink().begin(), target.getLink().end());
109 }
110 
111 void SerializeGPUModuleBase::init() {
112   static llvm::once_flag initializeBackendOnce;
113   llvm::call_once(initializeBackendOnce, []() {
114   // If the `AMDGPU` LLVM target was built, initialize it.
115 #if MLIR_ENABLE_ROCM_CONVERSIONS
116     LLVMInitializeAMDGPUTarget();
117     LLVMInitializeAMDGPUTargetInfo();
118     LLVMInitializeAMDGPUTargetMC();
119     LLVMInitializeAMDGPUAsmParser();
120     LLVMInitializeAMDGPUAsmPrinter();
121 #endif
122   });
123 }
124 
125 ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
126 
127 StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
128 
129 ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const {
130   return librariesToLink;
131 }
132 
133 LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) {
134   if (libs == AMDGCNLibraries::None)
135     return success();
136   StringRef pathRef = getToolkitPath();
137 
138   // Get the path for the device libraries
139   SmallString<256> path;
140   path.insert(path.begin(), pathRef.begin(), pathRef.end());
141   llvm::sys::path::append(path, "amdgcn", "bitcode");
142   pathRef = StringRef(path.data(), path.size());
143 
144   // Fail if the path is invalid.
145   if (!llvm::sys::fs::is_directory(pathRef)) {
146     getOperation().emitError() << "ROCm amdgcn bitcode path: " << pathRef
147                                << " does not exist or is not a directory";
148     return failure();
149   }
150 
151   // Helper function for adding a library.
152   auto addLib = [&](const Twine &lib) -> bool {
153     auto baseSize = path.size();
154     llvm::sys::path::append(path, lib);
155     StringRef pathRef(path.data(), path.size());
156     if (!llvm::sys::fs::is_regular_file(pathRef)) {
157       getOperation().emitRemark() << "bitcode library path: " << pathRef
158                                   << " does not exist or is not a file";
159       return true;
160     }
161     librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
162     path.truncate(baseSize);
163     return false;
164   };
165 
166   // Add ROCm device libraries. Fail if any of the libraries is not found, ie.
167   // if any of the `addLib` failed.
168   if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) ||
169       (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) ||
170       (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) ||
171       (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")))
172     return failure();
173   return success();
174 }
175 
176 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
177 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
178   // Return if there are no libs to load.
179   if (deviceLibs == AMDGCNLibraries::None && librariesToLink.empty())
180     return SmallVector<std::unique_ptr<llvm::Module>>();
181   if (failed(appendStandardLibs(deviceLibs)))
182     return std::nullopt;
183   SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
184   if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink,
185                                       bcFiles, true)))
186     return std::nullopt;
187   return std::move(bcFiles);
188 }
189 
190 LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
191   // Some ROCM builds don't strip this like they should
192   if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version"))
193     module.eraseNamedMetadata(openclVersion);
194   // Stop spamming us with clang version numbers
195   if (auto *ident = module.getNamedMetadata("llvm.ident"))
196     module.eraseNamedMetadata(ident);
197   // Override the libModules datalayout and target triple with the compiler's
198   // data layout should there be a discrepency.
199   setDataLayoutAndTriple(module);
200   return success();
201 }
202 
203 void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) {
204   // If all libraries are not set, traverse the module to determine which
205   // libraries are required.
206   if (deviceLibs != AMDGCNLibraries::All) {
207     for (llvm::Function &f : module.functions()) {
208       if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
209         StringRef funcName = f.getName();
210         if ("printf" == funcName)
211           deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl |
212                         AMDGCNLibraries::Ocml;
213         if (funcName.starts_with("__ockl_"))
214           deviceLibs |= AMDGCNLibraries::Ockl;
215         if (funcName.starts_with("__ocml_"))
216           deviceLibs |= AMDGCNLibraries::Ocml;
217         if (funcName == "__atomic_work_item_fence")
218           deviceLibs |= AMDGCNLibraries::Hip;
219       }
220     }
221   }
222   addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(),
223                       target.hasFiniteOnly(), target.hasUnsafeMath(),
224                       target.hasFastMath(), target.hasCorrectSqrt(),
225                       target.getAbi());
226 }
227 
228 void SerializeGPUModuleBase::addControlVariables(
229     llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz,
230     bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt,
231     StringRef abiVer) {
232   // Helper function for adding control variables.
233   auto addControlVariable = [&module](StringRef name, uint32_t value,
234                                       uint32_t bitwidth) {
235     if (module.getNamedGlobal(name))
236       return;
237     llvm::IntegerType *type =
238         llvm::IntegerType::getIntNTy(module.getContext(), bitwidth);
239     llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable(
240         module, /*isConstant=*/type, true,
241         llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
242         llvm::ConstantInt::get(type, value), name, /*before=*/nullptr,
243         /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
244         /*addressSpace=*/4);
245     controlVariable->setVisibility(
246         llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
247     controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
248     controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
249   };
250 
251   int abi = 500;
252   abiVer.getAsInteger(0, abi);
253   module.addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", abi);
254   // Return if no device libraries are required.
255   if (libs == AMDGCNLibraries::None)
256     return;
257   // Add ocml related control variables.
258   if (any(libs & AMDGCNLibraries::Ocml)) {
259     addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8);
260     addControlVariable("__oclc_daz_opt", daz || fastMath, 8);
261     addControlVariable("__oclc_correctly_rounded_sqrt32",
262                        correctSqrt && !fastMath, 8);
263     addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
264   }
265   // Add ocml or ockl related control variables.
266   if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) {
267     addControlVariable("__oclc_wavefrontsize64", wave64, 8);
268     // Get the ISA version.
269     llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(chip);
270     // Add the ISA control variable.
271     addControlVariable("__oclc_ISA_version",
272                        isaVersion.Minor + 100 * isaVersion.Stepping +
273                            1000 * isaVersion.Major,
274                        32);
275     addControlVariable("__oclc_ABI_version", abi, 32);
276   }
277 }
278 
279 std::optional<SmallVector<char, 0>>
280 SerializeGPUModuleBase::assembleIsa(StringRef isa) {
281   auto loc = getOperation().getLoc();
282 
283   StringRef targetTriple = this->triple;
284 
285   SmallVector<char, 0> result;
286   llvm::raw_svector_ostream os(result);
287 
288   llvm::Triple triple(llvm::Triple::normalize(targetTriple));
289   std::string error;
290   const llvm::Target *target =
291       llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
292   if (!target) {
293     emitError(loc, Twine("failed to lookup target: ") + error);
294     return std::nullopt;
295   }
296 
297   llvm::SourceMgr srcMgr;
298   srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc());
299 
300   const llvm::MCTargetOptions mcOptions;
301   std::unique_ptr<llvm::MCRegisterInfo> mri(
302       target->createMCRegInfo(targetTriple));
303   std::unique_ptr<llvm::MCAsmInfo> mai(
304       target->createMCAsmInfo(*mri, targetTriple, mcOptions));
305   std::unique_ptr<llvm::MCSubtargetInfo> sti(
306       target->createMCSubtargetInfo(targetTriple, chip, features));
307 
308   llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
309                       &mcOptions);
310   std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
311       ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
312   ctx.setObjectFileInfo(mofi.get());
313 
314   SmallString<128> cwd;
315   if (!llvm::sys::fs::current_path(cwd))
316     ctx.setCompilationDir(cwd);
317 
318   std::unique_ptr<llvm::MCStreamer> mcStreamer;
319   std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
320 
321   llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx);
322   llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
323   mcStreamer.reset(target->createMCObjectStreamer(
324       triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
325       mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
326       *sti));
327 
328   std::unique_ptr<llvm::MCAsmParser> parser(
329       createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
330   std::unique_ptr<llvm::MCTargetAsmParser> tap(
331       target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
332 
333   if (!tap) {
334     emitError(loc, "assembler initialization error");
335     return std::nullopt;
336   }
337 
338   parser->setTargetParser(*tap);
339   parser->Run(false);
340   return std::move(result);
341 }
342 
343 std::optional<SmallVector<char, 0>>
344 SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
345   // Assemble the ISA.
346   std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA);
347 
348   if (!isaBinary) {
349     getOperation().emitError() << "failed during ISA assembling";
350     return std::nullopt;
351   }
352 
353   // Save the ISA binary to a temp file.
354   int tempIsaBinaryFd = -1;
355   SmallString<128> tempIsaBinaryFilename;
356   if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd,
357                                          tempIsaBinaryFilename)) {
358     getOperation().emitError()
359         << "failed to create a temporary file for dumping the ISA binary";
360     return std::nullopt;
361   }
362   llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
363   {
364     llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
365     tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size());
366     tempIsaBinaryOs.flush();
367   }
368 
369   // Create a temp file for HSA code object.
370   SmallString<128> tempHsacoFilename;
371   if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco",
372                                          tempHsacoFilename)) {
373     getOperation().emitError()
374         << "failed to create a temporary file for the HSA code object";
375     return std::nullopt;
376   }
377   llvm::FileRemover cleanupHsaco(tempHsacoFilename);
378 
379   llvm::SmallString<128> lldPath(toolkitPath);
380   llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld");
381   int lldResult = llvm::sys::ExecuteAndWait(
382       lldPath,
383       {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename});
384   if (lldResult != 0) {
385     getOperation().emitError() << "lld invocation failed";
386     return std::nullopt;
387   }
388 
389   // Load the HSA code object.
390   auto hsacoFile =
391       llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false);
392   if (!hsacoFile) {
393     getOperation().emitError()
394         << "failed to read the HSA code object from the temp file";
395     return std::nullopt;
396   }
397 
398   StringRef buffer = (*hsacoFile)->getBuffer();
399 
400   return SmallVector<char, 0>(buffer.begin(), buffer.end());
401 }
402 
403 std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
404     const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) {
405   // Return LLVM IR if the compilation target is offload.
406 #define DEBUG_TYPE "serialize-to-llvm"
407   LLVM_DEBUG({
408     llvm::dbgs() << "LLVM IR for module: "
409                  << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
410                  << llvmModule << "\n";
411   });
412 #undef DEBUG_TYPE
413   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
414     return SerializeGPUModuleBase::moduleToObject(llvmModule);
415 
416   std::optional<llvm::TargetMachine *> targetMachine =
417       getOrCreateTargetMachine();
418   if (!targetMachine) {
419     getOperation().emitError() << "target Machine unavailable for triple "
420                                << triple << ", can't compile with LLVM";
421     return std::nullopt;
422   }
423 
424   // Translate the Module to ISA.
425   std::optional<std::string> serializedISA =
426       translateToISA(llvmModule, **targetMachine);
427   if (!serializedISA) {
428     getOperation().emitError() << "failed translating the module to ISA";
429     return std::nullopt;
430   }
431 #define DEBUG_TYPE "serialize-to-isa"
432   LLVM_DEBUG({
433     llvm::dbgs() << "ISA for module: "
434                  << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
435                  << *serializedISA << "\n";
436   });
437 #undef DEBUG_TYPE
438   // Return ISA assembly code if the compilation target is assembly.
439   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
440     return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
441 
442   // Compiling to binary requires a valid ROCm path, fail if it's not found.
443   if (getToolkitPath().empty()) {
444     getOperation().emitError() << "invalid ROCm path, please set a valid path";
445     return std::nullopt;
446   }
447 
448   // Compile to binary.
449   return compileToBinary(*serializedISA);
450 }
451 
452 #if MLIR_ENABLE_ROCM_CONVERSIONS
453 namespace {
454 class AMDGPUSerializer : public SerializeGPUModuleBase {
455 public:
456   AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
457                    const gpu::TargetOptions &targetOptions);
458 
459   std::optional<SmallVector<char, 0>>
460   moduleToObject(llvm::Module &llvmModule) override;
461 
462 private:
463   // Target options.
464   gpu::TargetOptions targetOptions;
465 };
466 } // namespace
467 
468 AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
469                                    const gpu::TargetOptions &targetOptions)
470     : SerializeGPUModuleBase(module, target, targetOptions),
471       targetOptions(targetOptions) {}
472 
473 std::optional<SmallVector<char, 0>>
474 AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
475   return moduleToObjectImpl(targetOptions, llvmModule);
476 }
477 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
478 
479 std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
480     Attribute attribute, Operation *module,
481     const gpu::TargetOptions &options) const {
482   assert(module && "The module must be non null.");
483   if (!module)
484     return std::nullopt;
485   if (!mlir::isa<gpu::GPUModuleOp>(module)) {
486     module->emitError("module must be a GPU module");
487     return std::nullopt;
488   }
489 #if MLIR_ENABLE_ROCM_CONVERSIONS
490   AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
491                               options);
492   serializer.init();
493   return serializer.run();
494 #else
495   module->emitError("the `AMDGPU` target was not built. Please enable it when "
496                     "building LLVM");
497   return std::nullopt;
498 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
499 }
500 
501 Attribute
502 ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
503                                   const SmallVector<char, 0> &object,
504                                   const gpu::TargetOptions &options) const {
505   gpu::CompilationTarget format = options.getCompilationTarget();
506   // If format is `fatbin` transform it to binary as `fatbin` is not yet
507   // supported.
508   gpu::KernelTableAttr kernels;
509   if (format > gpu::CompilationTarget::Binary) {
510     format = gpu::CompilationTarget::Binary;
511     kernels = ROCDL::getKernelMetadata(module, object);
512   }
513   DictionaryAttr properties{};
514   Builder builder(attribute.getContext());
515   StringAttr objectStr =
516       builder.getStringAttr(StringRef(object.data(), object.size()));
517   return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
518                                           properties, kernels);
519 }
520