xref: /llvm-project/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td (revision 5207632f8698a2fab0c4cdcdf2f7ad9aaf96e06f)
1//===-- GPUDeviceMappingAttr.td - Attribute definition -----*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines the attribute used to map loops to gpu.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef GPU_DEVICE_MAPPING_ATTR
14#define GPU_DEVICE_MAPPING_ATTR
15
16include "mlir/Dialect/GPU/IR/GPUBase.td"
17include "mlir/IR/EnumAttr.td"
18include "mlir/Dialect/SCF/IR/DeviceMappingInterface.td"
19
20def DimX : I64EnumAttrCase<"DimX", 0, "x">;
21def DimY : I64EnumAttrCase<"DimY", 1, "y">;
22def DimZ : I64EnumAttrCase<"DimZ", 2, "z">;
23def LinearDim0 : I64EnumAttrCase<"LinearDim0", 3, "linear_dim_0">;
24def LinearDim1 : I64EnumAttrCase<"LinearDim1", 4, "linear_dim_1">;
25def LinearDim2 : I64EnumAttrCase<"LinearDim2", 5, "linear_dim_2">;
26def LinearDim3 : I64EnumAttrCase<"LinearDim3", 6, "linear_dim_3">;
27def LinearDim4 : I64EnumAttrCase<"LinearDim4", 7, "linear_dim_4">;
28def LinearDim5 : I64EnumAttrCase<"LinearDim5", 8, "linear_dim_5">;
29def LinearDim6 : I64EnumAttrCase<"LinearDim6", 9, "linear_dim_6">;
30def LinearDim7 : I64EnumAttrCase<"LinearDim7", 10, "linear_dim_7">;
31def LinearDim8 : I64EnumAttrCase<"LinearDim8", 11, "linear_dim_8">;
32def LinearDim9 : I64EnumAttrCase<"LinearDim9", 12, "linear_dim_9">;
33
34// TODO: This would be better represented with separate Grid and Linear Mapping
35// ids. Unfortunately it is not yet possible to have an optional EnumParameter
36// so we currently embed the 2 modes in the same enum.
37def MappingIdEnum : I64EnumAttr<"MappingId", "Mapping ids for loop mapping", [
38    DimX, DimY, DimZ,
39    LinearDim0, LinearDim1, LinearDim2, LinearDim3, LinearDim4,
40    LinearDim5, LinearDim6, LinearDim7, LinearDim8, LinearDim9]> {
41  let cppNamespace = "::mlir::gpu";
42}
43
44def GPUBlockMappingAttr : GPU_Attr<"GPUBlockMapping", "block", [
45  DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ] >  {
46  let parameters = (ins
47    EnumParameter<MappingIdEnum>:$block
48  );
49  let assemblyFormat = "`<` params `>`";
50  let description = [{
51    An attribute that allows defining thread block parallelism for GPU devices.
52
53    Thread blocks (aka workgroup) are grouped into a grid described by a
54    3-dimensional rectangle.
55    This attribute indicates that thread block parallelism is desired.
56    It can be consumed by lowering to generate GPU code.
57    2 modes are supported: (1) 3D mapping mode and (2) linear mapping mode.
58
59    #### 3D mapping mode
60
61    The 3D block id is simply the 3D index of the block `(bidx, bidy, bidz)`.
62    If required, predication occurs on a per-dimension basis. This allows
63    specifying predication on a 3D sub-rectangle of the grid.
64
65    #### Linear mapping mode
66
67    The linear block id is obtained by linearizing the index of the block.
68    If required, predication occurs on the linear id. This allows specifying
69    predication on a 1D subset of the (linearized) grid.
70
71    For instance, if the basis is denoted as (GX, GY, GZ) and the block id is
72    denoted by (bx, by, bz), the block id is:
73      `linear_id = bx + by * GX + bz * GX * GBY)`.
74    The linear block id is fixed for the duration of a GPU kernel.
75
76    This linear id mapping attribute indicates a different linearization relation
77    is applied locally to a loop nest.
78
79    For instance, if the new basis is denoted as (LBD0, LBD1, LBD2, LBD3) the
80    block id in the new basis is:
81      ```(linear_id mod LBD0 ,
82          (linear_id / LBD0) mod * LBD1,
83          (linear_id / (LBD0 * LBD1)) mod LBD2,
84          (linear_id / (LBD0 * LBD1 * LBD2)) mod LBD3)```.
85    This reinterpretation is only fixed for the duration of a loop nest.
86  }];
87}
88
89def GPUWarpgroupMappingAttr
90    : GPU_Attr<"GPUWarpgroupMapping", "warpgroup", [
91      DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ]> {
92  let parameters = (ins
93    EnumParameter<MappingIdEnum>:$warpgroup
94  );
95  let assemblyFormat = "`<` params `>`";
96  let description = [{
97    An attribute that allows defining warpgroup parallelism for GPU devices.
98
99    Threads of proper granularity (e.g. multiple of
100    "kNumWarpsPerGroup * kWarpSize" on CUDA devices) can be grouped into
101    warpgroups described by a 3-dimensional rectangle.
102    This attribute indicates that warpgroup parallelism is desired.
103    It can be consumed by lowering to generate GPU code.
104    2 modes are supported: (1) 3D mapping mode and (2) linear mapping mode.
105
106    #### 3D mapping mode
107
108    The 3D warpgroup id is simply the adjusted 3D index of the thread
109    `(tidx / (kNumWarpsPerGroup * kWarpSize), tidy, tidz)`.
110    If required, predication occurs on a per-dimension basis. This allows
111    specifying predication on a 3D sub-rectangle of the warpgroups.
112
113    #### Linear mapping mode
114
115    The linear warpgroup id is obtained by linearizing the index of the warpgroup.
116    If required, predication occurs on the linear id. This allows specifying
117    predication on a 1D "kNumWarpsPerGroup * kWarpSize"-aligned subset of the
118    (linearized) block.
119
120    For instance, if the basis is denoted as (BX, BY, BZ) and the thread id is
121    id is denoted by (tx, ty, tz), the linear warpgroup id is:
122      ```linear_id = (tx + ty * BX + tz * BX * BY)
123                 / (kNumWarpsPerGroup * kWarpSize)```.
124    The linear warpgroup id is fixed for the duration of a GPU kernel.
125
126    This linear id mapping attribute indicates a different linearization relation
127    is applied locally to a loop nest.
128
129    For instance, if the new basis is denoted as (LWGD0, LWGD1, LWGD2, LWGD3) the
130    warpgroup id in the new basis is:
131      ```(linear_id mod LWGD0 ,
132          (linear_id / LWGD0) mod * LWGD1,
133          (linear_id / (LWGD0 * LWGD1)) mod LWGD2,
134          (linear_id / (LWGD0 * LWGD1 * LWGD2)) mod LWGD3)```.
135    This reinterpretation is only fixed for the duration of a loop nest.
136  }];
137}
138
139def GPUWarpMappingAttr
140    : GPU_Attr<"GPUWarpMapping", "warp", [
141      DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ]> {
142  let parameters = (ins
143    EnumParameter<MappingIdEnum>:$warp
144  );
145  let assemblyFormat = "`<` params `>`";
146  let description = [{
147    An attribute that allows defining warp parallelism for GPU devices.
148
149    Threads of proper granularity (e.g. multiple of "warp size" on CUDA devices)
150    can be grouped into warps described by a 3-dimensional rectangle.
151    This attribute indicates that warp parallelism is desired.
152    It can be consumed by lowering to generate GPU code.
153    2 modes are supported: (1) 3D mapping mode and (2) linear mapping mode.
154
155    #### 3D mapping mode
156
157    The 3D warp id is simply the adjusted 3D index of the thread
158    `(tidx / kWarpSize, tidy, tidz)`.
159    If required, predication occurs on a per-dimension basis. This allows
160    specifying predication on a 3D sub-rectangle of the warpgroups.
161
162    #### Linear mapping mode
163
164    The linear warp id is obtained by linearizing the index of the warp.
165    If required, predication occurs on the linear id. This allows specifying
166    predication on a 1D "kWarpSize"-aligned subset of the (linearized) block.
167
168    For instance, if the basis is denoted as (BX, BY, BZ) and the thread id is
169    id is denoted by (tx, ty, tz), the linear warp id is:
170      `linear_id = (tx + ty * BX + tz * BX * BY) / kWarpSize`.
171    The linear warp id is fixed for the duration of a GPU kernel.
172
173    This linear id mapping attribute indicates a different linearization relation
174    is applied locally to a loop nest.
175
176    For instance, if the new basis is denoted as (LWD0, LWD1, LWD2, LWD3) the
177    warp id in the new basis is:
178      ```(linear_id mod LWD0 ,
179          (linear_id / LWD0) mod * LWD1,
180          (linear_id / (LWD0 * LWD1)) mod LWD2,
181          (linear_id / (LWD0 * LWD1 * LWD2)) mod LWD3)```.
182    This reinterpretation is only fixed for the duration of a loop nest.
183  }];
184}
185
186def GPUThreadMappingAttr
187    : GPU_Attr<"GPUThreadMapping", "thread", [
188      DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ]> {
189  let parameters = (ins
190    EnumParameter<MappingIdEnum>:$thread
191  );
192  let assemblyFormat = "`<` params `>`";
193  let description = [{
194    An attribute that allows defining thread parallelism for GPU devices.
195
196    Thread (aka work item) are grouped into a thread blocks described by a
197    3-dimensional rectangle.
198    This attribute indicates that thread parallelism is desired.
199    It can be consumed by lowering to generate GPU.
200
201    #### 3D mapping mode
202
203    The 3D thread id is simply the 3D index of the thread `(tidx, tidy, tidz)`.
204    If required, predication occurs on a per-dimension basis. This allows
205    specifying predication on a 3D sub-rectangle of the block.
206
207    #### Linear mapping mode
208
209    The linear thread id is obtained by linearizing the index of the thread.
210    If required, predication occurs on the linear id. This allows specifying
211    predication on a 1D subset of the (linearized) block.
212
213    For instance, if the basis is denoted as (BX, BY, BZ) and the thread id is
214    id is denoted by (tx, ty, tz), the linear thread id is:
215      ```linear_id = (tx + ty * BX + tz * BX * BY)```.
216    The linear thread id is fixed for the duration of a GPU kernel.
217
218    This linear id mapping attribute indicates a different linearization relation
219    is applied locally to a loop nest.
220
221    For instance, if the new basis is denoted as (LTD0, LTD1, LTD2, LTD3) the
222    thread id in the new basis is:
223      ```(linear_id mod LTD0 ,
224          (linear_id / LTD0) mod * LTD1,
225          (linear_id / (LTD0 * LTD1)) mod LTD2,
226          (linear_id / (LTD0 * LTD1 * LTD2)) mod LTD3)```.
227    This reinterpretation is only fixed for the duration of a loop nest.
228  }];
229}
230
231def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [
232  DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ] >  {
233  let parameters = (ins
234    EnumParameter<GPU_AddressSpaceEnum>:$address_space
235  );
236  let assemblyFormat = "`<` params `>`";
237  let description = [{
238    An attribute that allows defining memory hierarchy for GPU devices.
239
240    GPU Memory has three memory space, global, workgroup, and private. The global memory
241    is visible to all workitems and workgroups, the workgroup memory is only available for workitems
242    within a workgroup, and private memory is only visible to a single workitem. This attribute indicates
243    that using memory hiearchy is desired. It can be consumed by lowering to
244    move data to a specific address space in GPU code.
245  }];
246}
247
248#endif // GPU_DEVICE_MAPPING_ATTR
249