//===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This files defines ROCDL target related functions including registration // calls for the `#rocdl.target` compilation attribute. // //===----------------------------------------------------------------------===// #include "mlir/Target/LLVM/ROCDL/Target.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/LLVM/ROCDL/Utils.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/Constants.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" #include "llvm/TargetParser/TargetParser.h" #include #include using namespace mlir; using namespace mlir::ROCDL; #ifndef __DEFAULT_ROCM_PATH__ #define __DEFAULT_ROCM_PATH__ "" #endif namespace { // Implementation of the `TargetAttrInterface` model. class ROCDLTargetAttrImpl : public gpu::TargetAttrInterface::FallbackModel { public: std::optional> serializeToObject(Attribute attribute, Operation *module, const gpu::TargetOptions &options) const; Attribute createObject(Attribute attribute, Operation *module, const SmallVector &object, const gpu::TargetOptions &options) const; }; } // namespace // Register the ROCDL dialect, the ROCDL translation and the target interface. void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) { ROCDLTargetAttr::attachInterface(*ctx); }); } void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( MLIRContext &context) { DialectRegistry registry; registerROCDLTargetInterfaceExternalModels(registry); context.appendDialectRegistry(registry); } // Search for the ROCM path. StringRef mlir::ROCDL::getROCMPath() { if (const char *var = std::getenv("ROCM_PATH")) return var; if (const char *var = std::getenv("ROCM_ROOT")) return var; if (const char *var = std::getenv("ROCM_HOME")) return var; return __DEFAULT_ROCM_PATH__; } SerializeGPUModuleBase::SerializeGPUModuleBase( Operation &module, ROCDLTargetAttr target, const gpu::TargetOptions &targetOptions) : ModuleToObject(module, target.getTriple(), target.getChip(), target.getFeatures(), target.getO()), target(target), toolkitPath(targetOptions.getToolkitPath()), librariesToLink(targetOptions.getLibrariesToLink()) { // If `targetOptions` has an empty toolkitPath use `getROCMPath` if (toolkitPath.empty()) toolkitPath = getROCMPath(); // Append the files in the target attribute. if (target.getLink()) librariesToLink.append(target.getLink().begin(), target.getLink().end()); } void SerializeGPUModuleBase::init() { static llvm::once_flag initializeBackendOnce; llvm::call_once(initializeBackendOnce, []() { // If the `AMDGPU` LLVM target was built, initialize it. #if MLIR_ENABLE_ROCM_CONVERSIONS LLVMInitializeAMDGPUTarget(); LLVMInitializeAMDGPUTargetInfo(); LLVMInitializeAMDGPUTargetMC(); LLVMInitializeAMDGPUAsmParser(); LLVMInitializeAMDGPUAsmPrinter(); #endif }); } ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } ArrayRef SerializeGPUModuleBase::getLibrariesToLink() const { return librariesToLink; } LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { if (libs == AMDGCNLibraries::None) return success(); StringRef pathRef = getToolkitPath(); // Get the path for the device libraries SmallString<256> path; path.insert(path.begin(), pathRef.begin(), pathRef.end()); llvm::sys::path::append(path, "amdgcn", "bitcode"); pathRef = StringRef(path.data(), path.size()); // Fail if the path is invalid. if (!llvm::sys::fs::is_directory(pathRef)) { getOperation().emitError() << "ROCm amdgcn bitcode path: " << pathRef << " does not exist or is not a directory"; return failure(); } // Helper function for adding a library. auto addLib = [&](const Twine &lib) -> bool { auto baseSize = path.size(); llvm::sys::path::append(path, lib); StringRef pathRef(path.data(), path.size()); if (!llvm::sys::fs::is_regular_file(pathRef)) { getOperation().emitRemark() << "bitcode library path: " << pathRef << " does not exist or is not a file"; return true; } librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef)); path.truncate(baseSize); return false; }; // Add ROCm device libraries. Fail if any of the libraries is not found, ie. // if any of the `addLib` failed. if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) || (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) || (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) || (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc"))) return failure(); return success(); } std::optional>> SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { // Return if there are no libs to load. if (deviceLibs == AMDGCNLibraries::None && librariesToLink.empty()) return SmallVector>(); if (failed(appendStandardLibs(deviceLibs))) return std::nullopt; SmallVector> bcFiles; if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink, bcFiles, true))) return std::nullopt; return std::move(bcFiles); } LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { // Some ROCM builds don't strip this like they should if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version")) module.eraseNamedMetadata(openclVersion); // Stop spamming us with clang version numbers if (auto *ident = module.getNamedMetadata("llvm.ident")) module.eraseNamedMetadata(ident); // Override the libModules datalayout and target triple with the compiler's // data layout should there be a discrepency. setDataLayoutAndTriple(module); return success(); } void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { // If all libraries are not set, traverse the module to determine which // libraries are required. if (deviceLibs != AMDGCNLibraries::All) { for (llvm::Function &f : module.functions()) { if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { StringRef funcName = f.getName(); if ("printf" == funcName) deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | AMDGCNLibraries::Ocml; if (funcName.starts_with("__ockl_")) deviceLibs |= AMDGCNLibraries::Ockl; if (funcName.starts_with("__ocml_")) deviceLibs |= AMDGCNLibraries::Ocml; if (funcName == "__atomic_work_item_fence") deviceLibs |= AMDGCNLibraries::Hip; } } } addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), target.hasFiniteOnly(), target.hasUnsafeMath(), target.hasFastMath(), target.hasCorrectSqrt(), target.getAbi()); } void SerializeGPUModuleBase::addControlVariables( llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) { // Helper function for adding control variables. auto addControlVariable = [&module](StringRef name, uint32_t value, uint32_t bitwidth) { if (module.getNamedGlobal(name)) return; llvm::IntegerType *type = llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( module, /*isConstant=*/type, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, llvm::ConstantInt::get(type, value), name, /*before=*/nullptr, /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, /*addressSpace=*/4); controlVariable->setVisibility( llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); }; int abi = 500; abiVer.getAsInteger(0, abi); module.addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", abi); // Return if no device libraries are required. if (libs == AMDGCNLibraries::None) return; // Add ocml related control variables. if (any(libs & AMDGCNLibraries::Ocml)) { addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8); addControlVariable("__oclc_daz_opt", daz || fastMath, 8); addControlVariable("__oclc_correctly_rounded_sqrt32", correctSqrt && !fastMath, 8); addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8); } // Add ocml or ockl related control variables. if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { addControlVariable("__oclc_wavefrontsize64", wave64, 8); // Get the ISA version. llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(chip); // Add the ISA control variable. addControlVariable("__oclc_ISA_version", isaVersion.Minor + 100 * isaVersion.Stepping + 1000 * isaVersion.Major, 32); addControlVariable("__oclc_ABI_version", abi, 32); } } std::optional> SerializeGPUModuleBase::assembleIsa(StringRef isa) { auto loc = getOperation().getLoc(); StringRef targetTriple = this->triple; SmallVector result; llvm::raw_svector_ostream os(result); llvm::Triple triple(llvm::Triple::normalize(targetTriple)); std::string error; const llvm::Target *target = llvm::TargetRegistry::lookupTarget(triple.normalize(), error); if (!target) { emitError(loc, Twine("failed to lookup target: ") + error); return std::nullopt; } llvm::SourceMgr srcMgr; srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc()); const llvm::MCTargetOptions mcOptions; std::unique_ptr mri( target->createMCRegInfo(targetTriple)); std::unique_ptr mai( target->createMCAsmInfo(*mri, targetTriple, mcOptions)); std::unique_ptr sti( target->createMCSubtargetInfo(targetTriple, chip, features)); llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, &mcOptions); std::unique_ptr mofi(target->createMCObjectFileInfo( ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); ctx.setObjectFileInfo(mofi.get()); SmallString<128> cwd; if (!llvm::sys::fs::current_path(cwd)) ctx.setCompilationDir(cwd); std::unique_ptr mcStreamer; std::unique_ptr mcii(target->createMCInstrInfo()); llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); mcStreamer.reset(target->createMCObjectStreamer( triple, ctx, std::unique_ptr(mab), mab->createObjectWriter(os), std::unique_ptr(ce), *sti)); std::unique_ptr parser( createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); std::unique_ptr tap( target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); if (!tap) { emitError(loc, "assembler initialization error"); return std::nullopt; } parser->setTargetParser(*tap); parser->Run(false); return std::move(result); } std::optional> SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { // Assemble the ISA. std::optional> isaBinary = assembleIsa(serializedISA); if (!isaBinary) { getOperation().emitError() << "failed during ISA assembling"; return std::nullopt; } // Save the ISA binary to a temp file. int tempIsaBinaryFd = -1; SmallString<128> tempIsaBinaryFilename; if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd, tempIsaBinaryFilename)) { getOperation().emitError() << "failed to create a temporary file for dumping the ISA binary"; return std::nullopt; } llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); { llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size()); tempIsaBinaryOs.flush(); } // Create a temp file for HSA code object. SmallString<128> tempHsacoFilename; if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFilename)) { getOperation().emitError() << "failed to create a temporary file for the HSA code object"; return std::nullopt; } llvm::FileRemover cleanupHsaco(tempHsacoFilename); llvm::SmallString<128> lldPath(toolkitPath); llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); int lldResult = llvm::sys::ExecuteAndWait( lldPath, {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); if (lldResult != 0) { getOperation().emitError() << "lld invocation failed"; return std::nullopt; } // Load the HSA code object. auto hsacoFile = llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); if (!hsacoFile) { getOperation().emitError() << "failed to read the HSA code object from the temp file"; return std::nullopt; } StringRef buffer = (*hsacoFile)->getBuffer(); return SmallVector(buffer.begin(), buffer.end()); } std::optional> SerializeGPUModuleBase::moduleToObjectImpl( const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ llvm::dbgs() << "LLVM IR for module: " << cast(getOperation()).getNameAttr() << "\n" << llvmModule << "\n"; }); #undef DEBUG_TYPE if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) return SerializeGPUModuleBase::moduleToObject(llvmModule); std::optional targetMachine = getOrCreateTargetMachine(); if (!targetMachine) { getOperation().emitError() << "target Machine unavailable for triple " << triple << ", can't compile with LLVM"; return std::nullopt; } // Translate the Module to ISA. std::optional serializedISA = translateToISA(llvmModule, **targetMachine); if (!serializedISA) { getOperation().emitError() << "failed translating the module to ISA"; return std::nullopt; } #define DEBUG_TYPE "serialize-to-isa" LLVM_DEBUG({ llvm::dbgs() << "ISA for module: " << cast(getOperation()).getNameAttr() << "\n" << *serializedISA << "\n"; }); #undef DEBUG_TYPE // Return ISA assembly code if the compilation target is assembly. if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) return SmallVector(serializedISA->begin(), serializedISA->end()); // Compiling to binary requires a valid ROCm path, fail if it's not found. if (getToolkitPath().empty()) { getOperation().emitError() << "invalid ROCm path, please set a valid path"; return std::nullopt; } // Compile to binary. return compileToBinary(*serializedISA); } #if MLIR_ENABLE_ROCM_CONVERSIONS namespace { class AMDGPUSerializer : public SerializeGPUModuleBase { public: AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, const gpu::TargetOptions &targetOptions); std::optional> moduleToObject(llvm::Module &llvmModule) override; private: // Target options. gpu::TargetOptions targetOptions; }; } // namespace AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, const gpu::TargetOptions &targetOptions) : SerializeGPUModuleBase(module, target, targetOptions), targetOptions(targetOptions) {} std::optional> AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { return moduleToObjectImpl(targetOptions, llvmModule); } #endif // MLIR_ENABLE_ROCM_CONVERSIONS std::optional> ROCDLTargetAttrImpl::serializeToObject( Attribute attribute, Operation *module, const gpu::TargetOptions &options) const { assert(module && "The module must be non null."); if (!module) return std::nullopt; if (!mlir::isa(module)) { module->emitError("module must be a GPU module"); return std::nullopt; } #if MLIR_ENABLE_ROCM_CONVERSIONS AMDGPUSerializer serializer(*module, cast(attribute), options); serializer.init(); return serializer.run(); #else module->emitError("the `AMDGPU` target was not built. Please enable it when " "building LLVM"); return std::nullopt; #endif // MLIR_ENABLE_ROCM_CONVERSIONS } Attribute ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module, const SmallVector &object, const gpu::TargetOptions &options) const { gpu::CompilationTarget format = options.getCompilationTarget(); // If format is `fatbin` transform it to binary as `fatbin` is not yet // supported. gpu::KernelTableAttr kernels; if (format > gpu::CompilationTarget::Binary) { format = gpu::CompilationTarget::Binary; kernels = ROCDL::getKernelMetadata(module, object); } DictionaryAttr properties{}; Builder builder(attribute.getContext()); StringAttr objectStr = builder.getStringAttr(StringRef(object.data(), object.size())); return builder.getAttr(attribute, format, objectStr, properties, kernels); }