1 //===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This files defines ROCDL target related functions including registration 10 // calls for the `#rocdl.target` compilation attribute. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Target/LLVM/ROCDL/Target.h" 15 16 #include "mlir/Dialect/GPU/IR/GPUDialect.h" 17 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" 18 #include "mlir/Support/FileUtilities.h" 19 #include "mlir/Target/LLVM/ROCDL/Utils.h" 20 #include "mlir/Target/LLVMIR/Export.h" 21 22 #include "llvm/IR/Constants.h" 23 #include "llvm/MC/MCAsmBackend.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCCodeEmitter.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCInstrInfo.h" 28 #include "llvm/MC/MCObjectFileInfo.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/FileUtilities.h" 37 #include "llvm/Support/Path.h" 38 #include "llvm/Support/Program.h" 39 #include "llvm/Support/SourceMgr.h" 40 #include "llvm/Support/TargetSelect.h" 41 #include "llvm/TargetParser/TargetParser.h" 42 43 #include <cstdlib> 44 #include <optional> 45 46 using namespace mlir; 47 using namespace mlir::ROCDL; 48 49 #ifndef __DEFAULT_ROCM_PATH__ 50 #define __DEFAULT_ROCM_PATH__ "" 51 #endif 52 53 namespace { 54 // Implementation of the `TargetAttrInterface` model. 55 class ROCDLTargetAttrImpl 56 : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> { 57 public: 58 std::optional<SmallVector<char, 0>> 59 serializeToObject(Attribute attribute, Operation *module, 60 const gpu::TargetOptions &options) const; 61 62 Attribute createObject(Attribute attribute, Operation *module, 63 const SmallVector<char, 0> &object, 64 const gpu::TargetOptions &options) const; 65 }; 66 } // namespace 67 68 // Register the ROCDL dialect, the ROCDL translation and the target interface. 69 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( 70 DialectRegistry ®istry) { 71 registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) { 72 ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx); 73 }); 74 } 75 76 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( 77 MLIRContext &context) { 78 DialectRegistry registry; 79 registerROCDLTargetInterfaceExternalModels(registry); 80 context.appendDialectRegistry(registry); 81 } 82 83 // Search for the ROCM path. 84 StringRef mlir::ROCDL::getROCMPath() { 85 if (const char *var = std::getenv("ROCM_PATH")) 86 return var; 87 if (const char *var = std::getenv("ROCM_ROOT")) 88 return var; 89 if (const char *var = std::getenv("ROCM_HOME")) 90 return var; 91 return __DEFAULT_ROCM_PATH__; 92 } 93 94 SerializeGPUModuleBase::SerializeGPUModuleBase( 95 Operation &module, ROCDLTargetAttr target, 96 const gpu::TargetOptions &targetOptions) 97 : ModuleToObject(module, target.getTriple(), target.getChip(), 98 target.getFeatures(), target.getO()), 99 target(target), toolkitPath(targetOptions.getToolkitPath()), 100 librariesToLink(targetOptions.getLibrariesToLink()) { 101 102 // If `targetOptions` has an empty toolkitPath use `getROCMPath` 103 if (toolkitPath.empty()) 104 toolkitPath = getROCMPath(); 105 106 // Append the files in the target attribute. 107 if (target.getLink()) 108 librariesToLink.append(target.getLink().begin(), target.getLink().end()); 109 } 110 111 void SerializeGPUModuleBase::init() { 112 static llvm::once_flag initializeBackendOnce; 113 llvm::call_once(initializeBackendOnce, []() { 114 // If the `AMDGPU` LLVM target was built, initialize it. 115 #if MLIR_ENABLE_ROCM_CONVERSIONS 116 LLVMInitializeAMDGPUTarget(); 117 LLVMInitializeAMDGPUTargetInfo(); 118 LLVMInitializeAMDGPUTargetMC(); 119 LLVMInitializeAMDGPUAsmParser(); 120 LLVMInitializeAMDGPUAsmPrinter(); 121 #endif 122 }); 123 } 124 125 ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } 126 127 StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } 128 129 ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const { 130 return librariesToLink; 131 } 132 133 LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { 134 if (libs == AMDGCNLibraries::None) 135 return success(); 136 StringRef pathRef = getToolkitPath(); 137 138 // Get the path for the device libraries 139 SmallString<256> path; 140 path.insert(path.begin(), pathRef.begin(), pathRef.end()); 141 llvm::sys::path::append(path, "amdgcn", "bitcode"); 142 pathRef = StringRef(path.data(), path.size()); 143 144 // Fail if the path is invalid. 145 if (!llvm::sys::fs::is_directory(pathRef)) { 146 getOperation().emitError() << "ROCm amdgcn bitcode path: " << pathRef 147 << " does not exist or is not a directory"; 148 return failure(); 149 } 150 151 // Helper function for adding a library. 152 auto addLib = [&](const Twine &lib) -> bool { 153 auto baseSize = path.size(); 154 llvm::sys::path::append(path, lib); 155 StringRef pathRef(path.data(), path.size()); 156 if (!llvm::sys::fs::is_regular_file(pathRef)) { 157 getOperation().emitRemark() << "bitcode library path: " << pathRef 158 << " does not exist or is not a file"; 159 return true; 160 } 161 librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef)); 162 path.truncate(baseSize); 163 return false; 164 }; 165 166 // Add ROCm device libraries. Fail if any of the libraries is not found, ie. 167 // if any of the `addLib` failed. 168 if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) || 169 (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) || 170 (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) || 171 (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc"))) 172 return failure(); 173 return success(); 174 } 175 176 std::optional<SmallVector<std::unique_ptr<llvm::Module>>> 177 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { 178 // Return if there are no libs to load. 179 if (deviceLibs == AMDGCNLibraries::None && librariesToLink.empty()) 180 return SmallVector<std::unique_ptr<llvm::Module>>(); 181 if (failed(appendStandardLibs(deviceLibs))) 182 return std::nullopt; 183 SmallVector<std::unique_ptr<llvm::Module>> bcFiles; 184 if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink, 185 bcFiles, true))) 186 return std::nullopt; 187 return std::move(bcFiles); 188 } 189 190 LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { 191 // Some ROCM builds don't strip this like they should 192 if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version")) 193 module.eraseNamedMetadata(openclVersion); 194 // Stop spamming us with clang version numbers 195 if (auto *ident = module.getNamedMetadata("llvm.ident")) 196 module.eraseNamedMetadata(ident); 197 // Override the libModules datalayout and target triple with the compiler's 198 // data layout should there be a discrepency. 199 setDataLayoutAndTriple(module); 200 return success(); 201 } 202 203 void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { 204 // If all libraries are not set, traverse the module to determine which 205 // libraries are required. 206 if (deviceLibs != AMDGCNLibraries::All) { 207 for (llvm::Function &f : module.functions()) { 208 if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { 209 StringRef funcName = f.getName(); 210 if ("printf" == funcName) 211 deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | 212 AMDGCNLibraries::Ocml; 213 if (funcName.starts_with("__ockl_")) 214 deviceLibs |= AMDGCNLibraries::Ockl; 215 if (funcName.starts_with("__ocml_")) 216 deviceLibs |= AMDGCNLibraries::Ocml; 217 if (funcName == "__atomic_work_item_fence") 218 deviceLibs |= AMDGCNLibraries::Hip; 219 } 220 } 221 } 222 addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), 223 target.hasFiniteOnly(), target.hasUnsafeMath(), 224 target.hasFastMath(), target.hasCorrectSqrt(), 225 target.getAbi()); 226 } 227 228 void SerializeGPUModuleBase::addControlVariables( 229 llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, 230 bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, 231 StringRef abiVer) { 232 // Helper function for adding control variables. 233 auto addControlVariable = [&module](StringRef name, uint32_t value, 234 uint32_t bitwidth) { 235 if (module.getNamedGlobal(name)) 236 return; 237 llvm::IntegerType *type = 238 llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); 239 llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( 240 module, /*isConstant=*/type, true, 241 llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, 242 llvm::ConstantInt::get(type, value), name, /*before=*/nullptr, 243 /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 244 /*addressSpace=*/4); 245 controlVariable->setVisibility( 246 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); 247 controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); 248 controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); 249 }; 250 251 int abi = 500; 252 abiVer.getAsInteger(0, abi); 253 module.addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", abi); 254 // Return if no device libraries are required. 255 if (libs == AMDGCNLibraries::None) 256 return; 257 // Add ocml related control variables. 258 if (any(libs & AMDGCNLibraries::Ocml)) { 259 addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8); 260 addControlVariable("__oclc_daz_opt", daz || fastMath, 8); 261 addControlVariable("__oclc_correctly_rounded_sqrt32", 262 correctSqrt && !fastMath, 8); 263 addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8); 264 } 265 // Add ocml or ockl related control variables. 266 if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { 267 addControlVariable("__oclc_wavefrontsize64", wave64, 8); 268 // Get the ISA version. 269 llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(chip); 270 // Add the ISA control variable. 271 addControlVariable("__oclc_ISA_version", 272 isaVersion.Minor + 100 * isaVersion.Stepping + 273 1000 * isaVersion.Major, 274 32); 275 addControlVariable("__oclc_ABI_version", abi, 32); 276 } 277 } 278 279 std::optional<SmallVector<char, 0>> 280 SerializeGPUModuleBase::assembleIsa(StringRef isa) { 281 auto loc = getOperation().getLoc(); 282 283 StringRef targetTriple = this->triple; 284 285 SmallVector<char, 0> result; 286 llvm::raw_svector_ostream os(result); 287 288 llvm::Triple triple(llvm::Triple::normalize(targetTriple)); 289 std::string error; 290 const llvm::Target *target = 291 llvm::TargetRegistry::lookupTarget(triple.normalize(), error); 292 if (!target) { 293 emitError(loc, Twine("failed to lookup target: ") + error); 294 return std::nullopt; 295 } 296 297 llvm::SourceMgr srcMgr; 298 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc()); 299 300 const llvm::MCTargetOptions mcOptions; 301 std::unique_ptr<llvm::MCRegisterInfo> mri( 302 target->createMCRegInfo(targetTriple)); 303 std::unique_ptr<llvm::MCAsmInfo> mai( 304 target->createMCAsmInfo(*mri, targetTriple, mcOptions)); 305 std::unique_ptr<llvm::MCSubtargetInfo> sti( 306 target->createMCSubtargetInfo(targetTriple, chip, features)); 307 308 llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, 309 &mcOptions); 310 std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( 311 ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); 312 ctx.setObjectFileInfo(mofi.get()); 313 314 SmallString<128> cwd; 315 if (!llvm::sys::fs::current_path(cwd)) 316 ctx.setCompilationDir(cwd); 317 318 std::unique_ptr<llvm::MCStreamer> mcStreamer; 319 std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); 320 321 llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); 322 llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); 323 mcStreamer.reset(target->createMCObjectStreamer( 324 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab), 325 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce), 326 *sti)); 327 328 std::unique_ptr<llvm::MCAsmParser> parser( 329 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); 330 std::unique_ptr<llvm::MCTargetAsmParser> tap( 331 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); 332 333 if (!tap) { 334 emitError(loc, "assembler initialization error"); 335 return std::nullopt; 336 } 337 338 parser->setTargetParser(*tap); 339 parser->Run(false); 340 return std::move(result); 341 } 342 343 std::optional<SmallVector<char, 0>> 344 SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { 345 // Assemble the ISA. 346 std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA); 347 348 if (!isaBinary) { 349 getOperation().emitError() << "failed during ISA assembling"; 350 return std::nullopt; 351 } 352 353 // Save the ISA binary to a temp file. 354 int tempIsaBinaryFd = -1; 355 SmallString<128> tempIsaBinaryFilename; 356 if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd, 357 tempIsaBinaryFilename)) { 358 getOperation().emitError() 359 << "failed to create a temporary file for dumping the ISA binary"; 360 return std::nullopt; 361 } 362 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); 363 { 364 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); 365 tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size()); 366 tempIsaBinaryOs.flush(); 367 } 368 369 // Create a temp file for HSA code object. 370 SmallString<128> tempHsacoFilename; 371 if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", 372 tempHsacoFilename)) { 373 getOperation().emitError() 374 << "failed to create a temporary file for the HSA code object"; 375 return std::nullopt; 376 } 377 llvm::FileRemover cleanupHsaco(tempHsacoFilename); 378 379 llvm::SmallString<128> lldPath(toolkitPath); 380 llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); 381 int lldResult = llvm::sys::ExecuteAndWait( 382 lldPath, 383 {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); 384 if (lldResult != 0) { 385 getOperation().emitError() << "lld invocation failed"; 386 return std::nullopt; 387 } 388 389 // Load the HSA code object. 390 auto hsacoFile = 391 llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); 392 if (!hsacoFile) { 393 getOperation().emitError() 394 << "failed to read the HSA code object from the temp file"; 395 return std::nullopt; 396 } 397 398 StringRef buffer = (*hsacoFile)->getBuffer(); 399 400 return SmallVector<char, 0>(buffer.begin(), buffer.end()); 401 } 402 403 std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl( 404 const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { 405 // Return LLVM IR if the compilation target is offload. 406 #define DEBUG_TYPE "serialize-to-llvm" 407 LLVM_DEBUG({ 408 llvm::dbgs() << "LLVM IR for module: " 409 << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" 410 << llvmModule << "\n"; 411 }); 412 #undef DEBUG_TYPE 413 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) 414 return SerializeGPUModuleBase::moduleToObject(llvmModule); 415 416 std::optional<llvm::TargetMachine *> targetMachine = 417 getOrCreateTargetMachine(); 418 if (!targetMachine) { 419 getOperation().emitError() << "target Machine unavailable for triple " 420 << triple << ", can't compile with LLVM"; 421 return std::nullopt; 422 } 423 424 // Translate the Module to ISA. 425 std::optional<std::string> serializedISA = 426 translateToISA(llvmModule, **targetMachine); 427 if (!serializedISA) { 428 getOperation().emitError() << "failed translating the module to ISA"; 429 return std::nullopt; 430 } 431 #define DEBUG_TYPE "serialize-to-isa" 432 LLVM_DEBUG({ 433 llvm::dbgs() << "ISA for module: " 434 << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" 435 << *serializedISA << "\n"; 436 }); 437 #undef DEBUG_TYPE 438 // Return ISA assembly code if the compilation target is assembly. 439 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) 440 return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end()); 441 442 // Compiling to binary requires a valid ROCm path, fail if it's not found. 443 if (getToolkitPath().empty()) { 444 getOperation().emitError() << "invalid ROCm path, please set a valid path"; 445 return std::nullopt; 446 } 447 448 // Compile to binary. 449 return compileToBinary(*serializedISA); 450 } 451 452 #if MLIR_ENABLE_ROCM_CONVERSIONS 453 namespace { 454 class AMDGPUSerializer : public SerializeGPUModuleBase { 455 public: 456 AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, 457 const gpu::TargetOptions &targetOptions); 458 459 std::optional<SmallVector<char, 0>> 460 moduleToObject(llvm::Module &llvmModule) override; 461 462 private: 463 // Target options. 464 gpu::TargetOptions targetOptions; 465 }; 466 } // namespace 467 468 AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, 469 const gpu::TargetOptions &targetOptions) 470 : SerializeGPUModuleBase(module, target, targetOptions), 471 targetOptions(targetOptions) {} 472 473 std::optional<SmallVector<char, 0>> 474 AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { 475 return moduleToObjectImpl(targetOptions, llvmModule); 476 } 477 #endif // MLIR_ENABLE_ROCM_CONVERSIONS 478 479 std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject( 480 Attribute attribute, Operation *module, 481 const gpu::TargetOptions &options) const { 482 assert(module && "The module must be non null."); 483 if (!module) 484 return std::nullopt; 485 if (!mlir::isa<gpu::GPUModuleOp>(module)) { 486 module->emitError("module must be a GPU module"); 487 return std::nullopt; 488 } 489 #if MLIR_ENABLE_ROCM_CONVERSIONS 490 AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute), 491 options); 492 serializer.init(); 493 return serializer.run(); 494 #else 495 module->emitError("the `AMDGPU` target was not built. Please enable it when " 496 "building LLVM"); 497 return std::nullopt; 498 #endif // MLIR_ENABLE_ROCM_CONVERSIONS 499 } 500 501 Attribute 502 ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module, 503 const SmallVector<char, 0> &object, 504 const gpu::TargetOptions &options) const { 505 gpu::CompilationTarget format = options.getCompilationTarget(); 506 // If format is `fatbin` transform it to binary as `fatbin` is not yet 507 // supported. 508 gpu::KernelTableAttr kernels; 509 if (format > gpu::CompilationTarget::Binary) { 510 format = gpu::CompilationTarget::Binary; 511 kernels = ROCDL::getKernelMetadata(module, object); 512 } 513 DictionaryAttr properties{}; 514 Builder builder(attribute.getContext()); 515 StringAttr objectStr = 516 builder.getStringAttr(StringRef(object.data(), object.size())); 517 return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr, 518 properties, kernels); 519 } 520