1//===-- Passes.td - GPU pass definition file ---------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef MLIR_DIALECT_GPU_PASSES 10#define MLIR_DIALECT_GPU_PASSES 11 12include "mlir/Pass/PassBase.td" 13 14def GpuLaunchSinkIndexComputations : Pass<"gpu-launch-sink-index-computations"> { 15 let summary = "Sink index computations into gpu.launch body"; 16 let constructor = "mlir::createGpuLauchSinkIndexComputationsPass()"; 17 let dependentDialects = ["mlir::gpu::GPUDialect"]; 18} 19 20def GpuKernelOutlining : Pass<"gpu-kernel-outlining", "ModuleOp"> { 21 let summary = "Outline gpu.launch bodies to kernel functions"; 22 let constructor = "mlir::createGpuKernelOutliningPass()"; 23 let dependentDialects = ["mlir::DLTIDialect", "cf::ControlFlowDialect"]; 24} 25 26def GpuAsyncRegionPass : Pass<"gpu-async-region", "func::FuncOp"> { 27 let summary = "Make GPU ops async"; 28 let constructor = "mlir::createGpuAsyncRegionPass()"; 29 let dependentDialects = ["async::AsyncDialect"]; 30} 31 32def GpuMapParallelLoopsPass 33 : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> { 34 let summary = "Greedily maps loops to GPU hardware dimensions."; 35 let constructor = "mlir::createGpuMapParallelLoopsPass()"; 36 let description = "Greedily maps loops to GPU hardware dimensions."; 37 let dependentDialects = ["mlir::gpu::GPUDialect"]; 38} 39 40def GpuEliminateBarriers 41 : Pass<"gpu-eliminate-barriers", "mlir::func::FuncOp"> { 42 let summary = "Erase unnecessary barriers"; 43 let description = [{ 44 Barrier elimination pass. If a barrier does not enforce any conflicting 45 pair of memory effects, including a pair that is enforced by another 46 barrier, it is unnecessary and can be removed. Adapted from 47 "High-Performance GPU-to-CPU Transpilation and Optimization via High-Level 48 Parallel Constructs" by Moses, Ivanov, Domke, Endo, Doerfert, and Zinenko in 49 PPoPP 2023 and implementation in Polygeist. 50 }]; 51 let dependentDialects = [ 52 "mlir::gpu::GPUDialect", 53 "mlir::memref::MemRefDialect", 54 "mlir::scf::SCFDialect" 55 ]; 56} 57 58def GpuDecomposeMemrefsPass : Pass<"gpu-decompose-memrefs"> { 59 let summary = "Decomposes memref index computation into explicit ops."; 60 let description = [{ 61 This pass decomposes memref index computation into explicit computations on 62 sizes/strides, obtained from `memref.extract_memref_metadata` which it tries 63 to place outside of `gpu.launch` body. Memrefs are then reconstructed using 64 `memref.reinterpret_cast`. 65 This is needed for as some targets (SPIR-V) lower memrefs to bare pointers 66 and sizes/strides for dynamically-sized memrefs are not available inside 67 `gpu.launch`. 68 }]; 69 let constructor = "mlir::createGpuDecomposeMemrefsPass()"; 70 let dependentDialects = [ 71 "mlir::gpu::GPUDialect", "mlir::memref::MemRefDialect", 72 "mlir::affine::AffineDialect" 73 ]; 74} 75 76def GpuModuleToBinaryPass 77 : Pass<"gpu-module-to-binary", ""> { 78 let summary = "Transforms a GPU module into a GPU binary."; 79 let description = [{ 80 This pass searches for all nested GPU modules and serializes the module 81 using the target attributes attached to the module, producing a GPU binary 82 with an object for every target. 83 84 The `format` argument can have the following values: 85 1. `offloading`, `llvm`: produces an offloading representation. 86 2. `assembly`, `isa`: produces assembly code. 87 3. `binary`, `bin`: produces binaries. 88 4. `fatbinary`, `fatbin`: produces fatbinaries. 89 }]; 90 let options = [ 91 Option<"toolkitPath", "toolkit", "std::string", [{""}], 92 "Toolkit path.">, 93 ListOption<"linkFiles", "l", "std::string", 94 "Extra files to link to.">, 95 Option<"cmdOptions", "opts", "std::string", [{""}], 96 "Command line options to pass to the tools.">, 97 Option<"compilationTarget", "format", "std::string", [{"fatbin"}], 98 "The target representation of the compilation process.">, 99 Option<"elfSection", "section", "std::string", [{""}], 100 "ELF section where binary is to be located."> 101 ]; 102} 103 104def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> { 105 let summary = "Attaches an NVVM target attribute to a GPU Module."; 106 let description = [{ 107 This pass searches for all GPU Modules in the immediate regions and attaches 108 an NVVM target if the module matches the name specified by the `module` argument. 109 110 Example: 111 ``` 112 // File: in.mlir: 113 gpu.module @nvvm_module_1 {...} 114 gpu.module @nvvm_module_2 {...} 115 gpu.module @rocdl_module_1 {...} 116 // mlir-opt --nvvm-attach-target="module=nvvm.* chip=sm_90" in.mlir 117 gpu.module @nvvm_module_1 [#nvvm.target<chip = "sm_90">] {...} 118 gpu.module @nvvm_module_2 [#nvvm.target<chip = "sm_90">] {...} 119 gpu.module @rocdl_module_1 {...} 120 ``` 121 }]; 122 let options = [ 123 Option<"moduleMatcher", "module", "std::string", 124 /*default=*/ [{""}], 125 "Regex used to identify the modules to attach the target to.">, 126 Option<"triple", "triple", "std::string", 127 /*default=*/ "\"nvptx64-nvidia-cuda\"", 128 "Target triple.">, 129 Option<"chip", "chip", "std::string", 130 /*default=*/"\"sm_50\"", 131 "Target chip.">, 132 Option<"features", "features", "std::string", 133 /*default=*/"\"+ptx60\"", 134 "Target features.">, 135 Option<"optLevel", "O", "unsigned", 136 /*default=*/"2", 137 "Optimization level.">, 138 Option<"fastFlag", "fast", "bool", 139 /*default=*/"false", 140 "Enable fast math mode.">, 141 Option<"ftzFlag", "ftz", "bool", 142 /*default=*/"false", 143 "Enable flush to zero for denormals.">, 144 ListOption<"linkLibs", "l", "std::string", 145 "Extra bitcode libraries paths to link to.">, 146 ]; 147} 148 149def GpuROCDLAttachTarget: Pass<"rocdl-attach-target", ""> { 150 let summary = "Attaches a ROCDL target attribute to a GPU Module."; 151 let description = [{ 152 This pass searches for all GPU Modules in the immediate regions and attaches 153 a ROCDL target if the module matches the name specified by the `module` argument. 154 155 Example: 156 ``` 157 // File: in.mlir: 158 gpu.module @nvvm_module_1 {...} 159 gpu.module @nvvm_module_2 {...} 160 gpu.module @rocdl_module_1 {...} 161 // mlir-opt --nvvm-attach-target="module=rocdl.* chip=gfx90a" in.mlir 162 gpu.module @nvvm_module_1 {...} 163 gpu.module @nvvm_module_2 {...} 164 gpu.module @rocdl_module_1 [#rocdl.target<chip = "gfx90a">] {...} 165 ``` 166 }]; 167 let options = [ 168 Option<"moduleMatcher", "module", "std::string", 169 /*default=*/ [{""}], 170 "Regex used to identify the modules to attach the target to.">, 171 Option<"triple", "triple", "std::string", 172 /*default=*/ "\"amdgcn-amd-amdhsa\"", 173 "Target triple.">, 174 Option<"chip", "chip", "std::string", 175 /*default=*/"\"gfx900\"", 176 "Target chip.">, 177 Option<"features", "features", "std::string", 178 /*default=*/"\"\"", 179 "Target features.">, 180 Option<"abiVersion", "abi", "std::string", 181 /*default=*/"\"500\"", 182 "ABI version.">, 183 Option<"optLevel", "O", "unsigned", 184 /*default=*/"2", 185 "Optimization level.">, 186 Option<"wave64Flag", "wave64", "bool", 187 /*default=*/"true", 188 "Use Wave64 mode.">, 189 Option<"fastFlag", "fast", "bool", 190 /*default=*/"false", 191 "Enable fast relaxed math opt.">, 192 Option<"dazFlag", "daz", "bool", 193 /*default=*/"false", 194 "Enable denormals are zero opt.">, 195 Option<"finiteOnlyFlag", "finite-only", "bool", 196 /*default=*/"false", 197 "Enable finite only opt.">, 198 Option<"unsafeMathFlag", "unsafe-math", "bool", 199 /*default=*/"false", 200 "Enable unsafe math opt.">, 201 Option<"correctSqrtFlag", "correct-sqrt", "bool", 202 /*default=*/"true", 203 "Enable correct rounded sqrt.">, 204 ListOption<"linkLibs", "l", "std::string", 205 "Extra bitcode libraries paths to link to.">, 206 ]; 207} 208 209def GpuSPIRVAttachTarget: Pass<"spirv-attach-target", ""> { 210 let summary = "Attaches an SPIR-V target attribute to a GPU Module."; 211 let description = [{ 212 This pass searches for all GPU Modules in the immediate regions and attaches 213 an SPIR-V target if the module matches the name specified by the `module` argument. 214 215 Example: 216 ``` 217 // Given the following file: in1.mlir: 218 gpu.module @nvvm_module_1 {...} 219 gpu.module @spirv_module_1 {...} 220 // With 221 // mlir-opt --spirv-attach-target="module=spirv.* ver=v1.0 caps=Kernel" in1.mlir 222 // it will generate, 223 gpu.module @nvvm_module_1 {...} 224 gpu.module @spirv_module_1 [#spirv.target<#spirv.vce<v1.0, [Kernel], []>, #spirv.resource_limits<>>] {...} 225 ``` 226 }]; 227 let options = [ 228 Option<"moduleMatcher", "module", "std::string", 229 /*default=*/ [{""}], 230 "Regex used to identify the modules to attach the target to.">, 231 Option<"spirvVersion", "ver", "std::string", 232 /*default=*/ "\"v1.0\"", 233 "SPIR-V Version.">, 234 ListOption<"spirvCapabilities", "caps", "std::string", 235 "List of supported SPIR-V Capabilities">, 236 ListOption<"spirvExtensions", "exts", "std::string", 237 "List of supported SPIR-V Extensions">, 238 Option<"clientApi", "client_api", "std::string", 239 /*default=*/ "\"Unknown\"", 240 "Client API">, 241 Option<"deviceVendor", "vendor", "std::string", 242 /*default=*/ "\"Unknown\"", 243 "Device Vendor">, 244 Option<"deviceType", "device_type", "std::string", 245 /*default=*/ "\"Unknown\"", 246 "Device Type">, 247 Option<"deviceId", "device_id", "uint32_t", 248 /*default=*/ "", 249 "Device ID">, 250 ]; 251} 252 253#endif // MLIR_DIALECT_GPU_PASSES 254