//===- Utils.cpp - MLIR ROCDL target utils ----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This files defines ROCDL target related utility classes and functions. // //===----------------------------------------------------------------------===// #include "mlir/Target/LLVM/ROCDL/Utils.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "llvm/ADT/StringMap.h" #include "llvm/Frontend/Offloading/Utility.h" using namespace mlir; using namespace mlir::ROCDL; std::optional> mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder, ArrayRef elfData) { uint16_t elfABIVersion; llvm::StringMap kernels; llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), "buffer"); // Get the metadata. llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( buffer, kernels, elfABIVersion); // Return `nullopt` if the metadata couldn't be retrieved. if (error) { llvm::consumeError(std::move(error)); return std::nullopt; } // Helper lambda for converting values. auto getI32Array = [&builder](const uint32_t *array) { return builder.getDenseI32ArrayAttr({static_cast(array[0]), static_cast(array[1]), static_cast(array[2])}); }; DenseMap kernelMD; for (const auto &[name, kernel] : kernels) { NamedAttrList attrs; // Add kernel metadata. attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount)); attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount)); attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount)); attrs.append("sgpr_spill_count", builder.getI64IntegerAttr(kernel.SGPRSpillCount)); attrs.append("vgpr_spill_count", builder.getI64IntegerAttr(kernel.VGPRSpillCount)); attrs.append("wavefront_size", builder.getI64IntegerAttr(kernel.WavefrontSize)); attrs.append("max_flat_workgroup_size", builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize)); attrs.append("group_segment_fixed_size", builder.getI64IntegerAttr(kernel.GroupSegmentList)); attrs.append("private_segment_fixed_size", builder.getI64IntegerAttr(kernel.PrivateSegmentSize)); attrs.append("reqd_workgroup_size", getI32Array(kernel.RequestedWorkgroupSize)); attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint)); kernelMD[builder.getStringAttr(name)] = std::move(attrs); } return std::move(kernelMD); } gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule, ArrayRef elfData) { auto module = cast(gpuModule); Builder builder(module.getContext()); SmallVector kernels; std::optional> mdMapOrNull = getAMDHSAKernelsELFMetadata(builder, elfData); for (auto funcOp : module.getBody()->getOps()) { if (!funcOp->getDiscardableAttr("rocdl.kernel")) continue; kernels.push_back(gpu::KernelMetadataAttr::get( funcOp, mdMapOrNull ? builder.getDictionaryAttr( mdMapOrNull->lookup(funcOp.getNameAttr())) : nullptr)); } return gpu::KernelTableAttr::get(gpuModule->getContext(), kernels); }