1//===-- GPUDeviceMappingAttr.td - Attribute definition -----*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// Defines the attribute used to map loops to gpu. 10// 11//===----------------------------------------------------------------------===// 12 13#ifndef GPU_DEVICE_MAPPING_ATTR 14#define GPU_DEVICE_MAPPING_ATTR 15 16include "mlir/Dialect/GPU/IR/GPUBase.td" 17include "mlir/IR/EnumAttr.td" 18include "mlir/Dialect/SCF/IR/DeviceMappingInterface.td" 19 20def DimX : I64EnumAttrCase<"DimX", 0, "x">; 21def DimY : I64EnumAttrCase<"DimY", 1, "y">; 22def DimZ : I64EnumAttrCase<"DimZ", 2, "z">; 23def LinearDim0 : I64EnumAttrCase<"LinearDim0", 3, "linear_dim_0">; 24def LinearDim1 : I64EnumAttrCase<"LinearDim1", 4, "linear_dim_1">; 25def LinearDim2 : I64EnumAttrCase<"LinearDim2", 5, "linear_dim_2">; 26def LinearDim3 : I64EnumAttrCase<"LinearDim3", 6, "linear_dim_3">; 27def LinearDim4 : I64EnumAttrCase<"LinearDim4", 7, "linear_dim_4">; 28def LinearDim5 : I64EnumAttrCase<"LinearDim5", 8, "linear_dim_5">; 29def LinearDim6 : I64EnumAttrCase<"LinearDim6", 9, "linear_dim_6">; 30def LinearDim7 : I64EnumAttrCase<"LinearDim7", 10, "linear_dim_7">; 31def LinearDim8 : I64EnumAttrCase<"LinearDim8", 11, "linear_dim_8">; 32def LinearDim9 : I64EnumAttrCase<"LinearDim9", 12, "linear_dim_9">; 33 34// TODO: This would be better represented with separate Grid and Linear Mapping 35// ids. Unfortunately it is not yet possible to have an optional EnumParameter 36// so we currently embed the 2 modes in the same enum. 37def MappingIdEnum : I64EnumAttr<"MappingId", "Mapping ids for loop mapping", [ 38 DimX, DimY, DimZ, 39 LinearDim0, LinearDim1, LinearDim2, LinearDim3, LinearDim4, 40 LinearDim5, LinearDim6, LinearDim7, LinearDim8, LinearDim9]> { 41 let cppNamespace = "::mlir::gpu"; 42} 43 44def GPUBlockMappingAttr : GPU_Attr<"GPUBlockMapping", "block", [ 45 DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ] > { 46 let parameters = (ins 47 EnumParameter<MappingIdEnum>:$block 48 ); 49 let assemblyFormat = "`<` params `>`"; 50 let description = [{ 51 An attribute that allows defining thread block parallelism for GPU devices. 52 53 Thread blocks (aka workgroup) are grouped into a grid described by a 54 3-dimensional rectangle. 55 This attribute indicates that thread block parallelism is desired. 56 It can be consumed by lowering to generate GPU code. 57 2 modes are supported: (1) 3D mapping mode and (2) linear mapping mode. 58 59 #### 3D mapping mode 60 61 The 3D block id is simply the 3D index of the block `(bidx, bidy, bidz)`. 62 If required, predication occurs on a per-dimension basis. This allows 63 specifying predication on a 3D sub-rectangle of the grid. 64 65 #### Linear mapping mode 66 67 The linear block id is obtained by linearizing the index of the block. 68 If required, predication occurs on the linear id. This allows specifying 69 predication on a 1D subset of the (linearized) grid. 70 71 For instance, if the basis is denoted as (GX, GY, GZ) and the block id is 72 denoted by (bx, by, bz), the block id is: 73 `linear_id = bx + by * GX + bz * GX * GBY)`. 74 The linear block id is fixed for the duration of a GPU kernel. 75 76 This linear id mapping attribute indicates a different linearization relation 77 is applied locally to a loop nest. 78 79 For instance, if the new basis is denoted as (LBD0, LBD1, LBD2, LBD3) the 80 block id in the new basis is: 81 ```(linear_id mod LBD0 , 82 (linear_id / LBD0) mod * LBD1, 83 (linear_id / (LBD0 * LBD1)) mod LBD2, 84 (linear_id / (LBD0 * LBD1 * LBD2)) mod LBD3)```. 85 This reinterpretation is only fixed for the duration of a loop nest. 86 }]; 87} 88 89def GPUWarpgroupMappingAttr 90 : GPU_Attr<"GPUWarpgroupMapping", "warpgroup", [ 91 DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ]> { 92 let parameters = (ins 93 EnumParameter<MappingIdEnum>:$warpgroup 94 ); 95 let assemblyFormat = "`<` params `>`"; 96 let description = [{ 97 An attribute that allows defining warpgroup parallelism for GPU devices. 98 99 Threads of proper granularity (e.g. multiple of 100 "kNumWarpsPerGroup * kWarpSize" on CUDA devices) can be grouped into 101 warpgroups described by a 3-dimensional rectangle. 102 This attribute indicates that warpgroup parallelism is desired. 103 It can be consumed by lowering to generate GPU code. 104 2 modes are supported: (1) 3D mapping mode and (2) linear mapping mode. 105 106 #### 3D mapping mode 107 108 The 3D warpgroup id is simply the adjusted 3D index of the thread 109 `(tidx / (kNumWarpsPerGroup * kWarpSize), tidy, tidz)`. 110 If required, predication occurs on a per-dimension basis. This allows 111 specifying predication on a 3D sub-rectangle of the warpgroups. 112 113 #### Linear mapping mode 114 115 The linear warpgroup id is obtained by linearizing the index of the warpgroup. 116 If required, predication occurs on the linear id. This allows specifying 117 predication on a 1D "kNumWarpsPerGroup * kWarpSize"-aligned subset of the 118 (linearized) block. 119 120 For instance, if the basis is denoted as (BX, BY, BZ) and the thread id is 121 id is denoted by (tx, ty, tz), the linear warpgroup id is: 122 ```linear_id = (tx + ty * BX + tz * BX * BY) 123 / (kNumWarpsPerGroup * kWarpSize)```. 124 The linear warpgroup id is fixed for the duration of a GPU kernel. 125 126 This linear id mapping attribute indicates a different linearization relation 127 is applied locally to a loop nest. 128 129 For instance, if the new basis is denoted as (LWGD0, LWGD1, LWGD2, LWGD3) the 130 warpgroup id in the new basis is: 131 ```(linear_id mod LWGD0 , 132 (linear_id / LWGD0) mod * LWGD1, 133 (linear_id / (LWGD0 * LWGD1)) mod LWGD2, 134 (linear_id / (LWGD0 * LWGD1 * LWGD2)) mod LWGD3)```. 135 This reinterpretation is only fixed for the duration of a loop nest. 136 }]; 137} 138 139def GPUWarpMappingAttr 140 : GPU_Attr<"GPUWarpMapping", "warp", [ 141 DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ]> { 142 let parameters = (ins 143 EnumParameter<MappingIdEnum>:$warp 144 ); 145 let assemblyFormat = "`<` params `>`"; 146 let description = [{ 147 An attribute that allows defining warp parallelism for GPU devices. 148 149 Threads of proper granularity (e.g. multiple of "warp size" on CUDA devices) 150 can be grouped into warps described by a 3-dimensional rectangle. 151 This attribute indicates that warp parallelism is desired. 152 It can be consumed by lowering to generate GPU code. 153 2 modes are supported: (1) 3D mapping mode and (2) linear mapping mode. 154 155 #### 3D mapping mode 156 157 The 3D warp id is simply the adjusted 3D index of the thread 158 `(tidx / kWarpSize, tidy, tidz)`. 159 If required, predication occurs on a per-dimension basis. This allows 160 specifying predication on a 3D sub-rectangle of the warpgroups. 161 162 #### Linear mapping mode 163 164 The linear warp id is obtained by linearizing the index of the warp. 165 If required, predication occurs on the linear id. This allows specifying 166 predication on a 1D "kWarpSize"-aligned subset of the (linearized) block. 167 168 For instance, if the basis is denoted as (BX, BY, BZ) and the thread id is 169 id is denoted by (tx, ty, tz), the linear warp id is: 170 `linear_id = (tx + ty * BX + tz * BX * BY) / kWarpSize`. 171 The linear warp id is fixed for the duration of a GPU kernel. 172 173 This linear id mapping attribute indicates a different linearization relation 174 is applied locally to a loop nest. 175 176 For instance, if the new basis is denoted as (LWD0, LWD1, LWD2, LWD3) the 177 warp id in the new basis is: 178 ```(linear_id mod LWD0 , 179 (linear_id / LWD0) mod * LWD1, 180 (linear_id / (LWD0 * LWD1)) mod LWD2, 181 (linear_id / (LWD0 * LWD1 * LWD2)) mod LWD3)```. 182 This reinterpretation is only fixed for the duration of a loop nest. 183 }]; 184} 185 186def GPUThreadMappingAttr 187 : GPU_Attr<"GPUThreadMapping", "thread", [ 188 DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ]> { 189 let parameters = (ins 190 EnumParameter<MappingIdEnum>:$thread 191 ); 192 let assemblyFormat = "`<` params `>`"; 193 let description = [{ 194 An attribute that allows defining thread parallelism for GPU devices. 195 196 Thread (aka work item) are grouped into a thread blocks described by a 197 3-dimensional rectangle. 198 This attribute indicates that thread parallelism is desired. 199 It can be consumed by lowering to generate GPU. 200 201 #### 3D mapping mode 202 203 The 3D thread id is simply the 3D index of the thread `(tidx, tidy, tidz)`. 204 If required, predication occurs on a per-dimension basis. This allows 205 specifying predication on a 3D sub-rectangle of the block. 206 207 #### Linear mapping mode 208 209 The linear thread id is obtained by linearizing the index of the thread. 210 If required, predication occurs on the linear id. This allows specifying 211 predication on a 1D subset of the (linearized) block. 212 213 For instance, if the basis is denoted as (BX, BY, BZ) and the thread id is 214 id is denoted by (tx, ty, tz), the linear thread id is: 215 ```linear_id = (tx + ty * BX + tz * BX * BY)```. 216 The linear thread id is fixed for the duration of a GPU kernel. 217 218 This linear id mapping attribute indicates a different linearization relation 219 is applied locally to a loop nest. 220 221 For instance, if the new basis is denoted as (LTD0, LTD1, LTD2, LTD3) the 222 thread id in the new basis is: 223 ```(linear_id mod LTD0 , 224 (linear_id / LTD0) mod * LTD1, 225 (linear_id / (LTD0 * LTD1)) mod LTD2, 226 (linear_id / (LTD0 * LTD1 * LTD2)) mod LTD3)```. 227 This reinterpretation is only fixed for the duration of a loop nest. 228 }]; 229} 230 231def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ 232 DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ] > { 233 let parameters = (ins 234 EnumParameter<GPU_AddressSpaceEnum>:$address_space 235 ); 236 let assemblyFormat = "`<` params `>`"; 237 let description = [{ 238 An attribute that allows defining memory hierarchy for GPU devices. 239 240 GPU Memory has three memory space, global, workgroup, and private. The global memory 241 is visible to all workitems and workgroups, the workgroup memory is only available for workitems 242 within a workgroup, and private memory is only visible to a single workitem. This attribute indicates 243 that using memory hiearchy is desired. It can be consumed by lowering to 244 move data to a specific address space in GPU code. 245 }]; 246} 247 248#endif // GPU_DEVICE_MAPPING_ATTR 249