xref: /llvm-project/mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td (revision 5a9bdd85ee4d8527e2cedf44f3ce26ff414f9b6a)
1//===- NVGPUTransformOps.td - NVGPU transform ops ----------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef NVGPU_TRANSFORM_OPS
10#define NVGPU_TRANSFORM_OPS
11
12include "mlir/Dialect/Transform/IR/TransformAttrs.td"
13include "mlir/Dialect/Transform/IR/TransformDialect.td"
14include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td"
15include "mlir/Dialect/Transform/IR/TransformTypes.td"
16include "mlir/Interfaces/SideEffectInterfaces.td"
17
18//===----------------------------------------------------------------------===//
19// Apply...ConversionPatternsOp
20//===----------------------------------------------------------------------===//
21
22def ApplyNVGPUToNVVMConversionPatternsOp : Op<Transform_Dialect,
23    "apply_conversion_patterns.nvgpu.nvgpu_to_nvvm",
24    [DeclareOpInterfaceMethods<ConversionPatternDescriptorOpInterface,
25                               ["verifyTypeConverter"]>]> {
26  let description = [{
27    Collects patterns that convert NVGPU dialect ops to NVVM dialect ops. These
28    patterns require an "LLVMTypeConverter".
29  }];
30  let assemblyFormat = "attr-dict";
31}
32
33//===----------------------------------------------------------------------===//
34// CreateAsyncGroupsOp
35//===----------------------------------------------------------------------===//
36
37def CreateAsyncGroupsOp :
38  Op<Transform_Dialect, "nvgpu.create_async_groups",
39    [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
40     TransformEachOpTrait,
41     TransformOpInterface,
42     ReportTrackingListenerFailuresOpTrait]> {
43  let description = [{
44    Look for global to shared memory copies within the targeted op in the form
45    of vector transfer ops and convert them to async copies when possible.
46    Consecutive copies are put into the same group. A "wait" operation is
47    inserted right at the of end the group.
48
49    `bypass_l1` specifies whether `bypassL1` attributes should be added to
50    the async copies. `bypass_l1` is a compiler hint: only 16 byte transfers
51    can bypass the L1 cache, so this attribute is not set for any other transfer
52    sizes.
53
54    #### Return modes
55
56    This op consumes the `target` handle and produces the `result` handle, which
57    is mapped to the same payload operations as the `target` handle. The op
58    modifies the payload.
59  }];
60
61  let arguments = (ins TransformHandleTypeInterface:$target,
62                   UnitAttr:$bypass_l1);
63  let results = (outs TransformHandleTypeInterface:$result);
64
65  let assemblyFormat = [{
66    $target attr-dict `:` functional-type(operands, results)
67  }];
68
69  let extraClassDeclaration = [{
70    ::mlir::DiagnosedSilenceableFailure applyToOne(
71        ::mlir::transform::TransformRewriter &rewriter,
72        ::mlir::Operation *target,
73        ::mlir::transform::ApplyToEachResultList &results,
74        ::mlir::transform::TransformState &state);
75  }];
76}
77
78//===----------------------------------------------------------------------===//
79// PipelineSharedMemoryCopiesOp
80//===----------------------------------------------------------------------===//
81
82def PipelineSharedMemoryCopiesOp :
83  Op<Transform_Dialect, "nvgpu.pipeline_shared_memory_copies",
84    [FunctionalStyleTransformOpTrait,
85     MemoryEffectsOpInterface,
86     TransformEachOpTrait,
87     TransformOpInterface,
88     ReportTrackingListenerFailuresOpTrait]> {
89  let summary =
90    "Applies software pipelining to a given loop with shared memory copies";
91
92  let description = [{
93    Applies software pipelining to a given scf.for loop. The pipelining
94    strategy will look for a load into shared memory and pipeline it to overlap
95    it with the rest of the loop.
96
97    NOTE: It is user responsibility to ensure that there are no dependency
98    between `depth` iterations of the loop by using multi-buffering. It is
99    also user responsibility to ensure a sufficient amount of shared memory
100    is allocated to cover eventual writes by `depth-1` speculative
101    iterations.
102
103    `depth` will indicate how many stages the software pipeline should have.
104    `peel_epilogue` allows to force the epilogue to be peeled out instead of
105    potentially using predicated operations for the epilogue phase.
106
107    #### Return modes
108
109    Consumes the operand handle and produces a result handle pointing to the
110    loop, which may or may not have been pipelined. Produces a definite failure
111    if the loop pipeliner mutated the IR before failing to pipeline, in
112    particular if `peel_epilogue` is not set and the loop body doesn't support
113    predication. If failure propagation mode is set to "propagate", produces a
114    silenceable failure when pipelining preconditions, e.g., loop bound being
115    static, are not met or when the loop wasn't pipelined because due to the
116    lack of loads into shared memory. If the failure propagation mode is set
117    to "suppress" (default), succeeds in these case and associates the result
118    handle with the original loop.
119
120    TODO: the shared memory part and behavior specific to NVGPU should be
121    made orthogonal to pipelining so that `transform.loop.pipeline` becomes
122    usable here.
123  }];
124
125  let arguments = (ins TransformHandleTypeInterface:$for_op,
126                   I64Attr:$depth,
127                   UnitAttr:$peel_epilogue,
128                   DefaultValuedAttr<FailurePropagationMode,
129                      "::mlir::transform::FailurePropagationMode::Suppress">
130                     :$failure_propagation_mode);
131  let results = (outs TransformHandleTypeInterface:$result);
132
133  let assemblyFormat = [{
134    `failures` `(` $failure_propagation_mode `)`
135    $for_op
136    attr-dict
137    `:` functional-type(operands, results)
138  }];
139
140  let extraClassDeclaration = [{
141    ::mlir::DiagnosedSilenceableFailure applyToOne(
142        ::mlir::transform::TransformRewriter &rewriter,
143        ::mlir::scf::ForOp forOp,
144        ::mlir::transform::ApplyToEachResultList &results,
145        ::mlir::transform::TransformState &state);
146  }];
147}
148
149//===----------------------------------------------------------------------===//
150// RewriteMatmulAsMmaSyncOp
151//===----------------------------------------------------------------------===//
152
153def RewriteMatmulAsMmaSyncOp :
154  Op<Transform_Dialect, "nvgpu.rewrite_matmul_as_mma_sync",
155    [FunctionalStyleTransformOpTrait,
156     MemoryEffectsOpInterface,
157     TransformEachOpTrait,
158     TransformOpInterface,
159     ReportTrackingListenerFailuresOpTrait]> {
160  let description = [{
161    Rewrite a matmul operation on memref to an mma.sync operation on vectors.
162
163    Memory copies with the required access patterns are automatically inserted.
164    Operations that do not have a 1-1 mapping to mma.sync operations are left
165    unchanged.
166  }];
167
168  let arguments = (ins TransformHandleTypeInterface:$target);
169  let results = (outs);
170
171  let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) ";
172
173  let extraClassDeclaration = [{
174    ::mlir::DiagnosedSilenceableFailure applyToOne(
175        ::mlir::transform::TransformRewriter &rewriter,
176        ::mlir::linalg::LinalgOp linalgOp,
177        ::mlir::transform::ApplyToEachResultList &results,
178        ::mlir::transform::TransformState &state);
179  }];
180}
181
182//===----------------------------------------------------------------------===//
183// RewriteCopyAsTmaOp
184//===----------------------------------------------------------------------===//
185
186def RewriteCopyAsTmaOp :
187  Op<Transform_Dialect, "nvgpu.rewrite_copy_as_tma",
188    [FunctionalStyleTransformOpTrait,
189     MemoryEffectsOpInterface,
190     TransformEachOpTrait,
191     TransformOpInterface,
192     ReportTrackingListenerFailuresOpTrait]> {
193  let description = [{
194    Rewrite a copy operation on memref to tma operations that transit through
195    shared memory.
196  }];
197
198  let arguments = (ins TransformHandleTypeInterface:$target);
199  let results = (outs);
200
201  let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) ";
202
203  let extraClassDeclaration = [{
204    ::mlir::DiagnosedSilenceableFailure apply(
205        ::mlir::transform::TransformRewriter &rewriter,
206        ::mlir::transform::TransformResults &transformResults,
207        ::mlir::transform::TransformState &state);
208  }];
209}
210
211#endif // NVGPU_TRANSFORM_OPS
212