xref: /llvm-project/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (revision aa2952165cd1808dab2bb49b97becc097f4c9cac)
1//===- LinalgTransformOps.td - Linalg transform ops --------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LINALG_TRANSFORM_OPS
10#define LINALG_TRANSFORM_OPS
11
12include "mlir/Dialect/Linalg/TransformOps/LinalgTransformEnums.td"
13include "mlir/Dialect/Transform/IR/TransformAttrs.td"
14include "mlir/Dialect/Transform/IR/TransformDialect.td"
15include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td"
16include "mlir/Dialect/Transform/IR/TransformTypes.td"
17include "mlir/Dialect/SCF/IR/DeviceMappingInterface.td"
18include "mlir/Interfaces/SideEffectInterfaces.td"
19include "mlir/IR/OpBase.td"
20include "mlir/IR/RegionKindInterface.td"
21
22// This is roughly similar to OpFoldResult assuming the handle produces a single
23// value in the payload IR.
24def TransformAnyParamTypeOrAnyHandle : Type<
25    Or<[TransformHandleTypeInterface.predicate,
26        TransformParamTypeInterface.predicate]>,
27    "transform any param type or any handle type">;
28
29//===----------------------------------------------------------------------===//
30// Apply...PatternsOp
31//===----------------------------------------------------------------------===//
32
33def ApplyEraseUnnecessaryInputsPatternsOp : Op<Transform_Dialect,
34    "apply_patterns.linalg.erase_unnecessary_inputs",
35    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
36  let description = [{
37    Collects patterns that promote inputs to outputs and remove unused inputs of
38    `linalg.generic` ops.
39  }];
40
41  let assemblyFormat = "attr-dict";
42}
43
44def ApplyDecomposeTensorPackUnpackPatternsOp
45    : Op<Transform_Dialect, "apply_patterns.linalg.decompose_pack_unpack",
46         [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
47  let description = [{
48    Collect patterns to decompose tensor.pack and tensor.unpack into e.g.
49    tensor::PadOp, linalg::transposeOp Ops. Requires all outer dims to be unit.
50  }];
51
52  let assemblyFormat = "attr-dict";
53}
54
55def ApplyDecomposeTensorPadPatternsOp
56    : Op<Transform_Dialect, "apply_patterns.linalg.decompose_pad",
57         [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
58  let description = [{
59    Collect patterns to decompose tensor.pad into e.g. tensor::EmptyOp,
60    linalg::FillOp and tensor::InsertSliceOp.
61  }];
62
63  let assemblyFormat = "attr-dict";
64}
65
66def ApplyFoldUnitExtentDimsViaReshapesPatternsOp : Op<Transform_Dialect,
67    "apply_patterns.linalg.fold_unit_extent_dims_via_reshapes",
68    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
69  let description = [{
70    Collects patterns to fold unit-extent dimensions in operands/results of
71    linalg ops on tensors via reassociative reshape ops.
72  }];
73
74  let assemblyFormat = "attr-dict";
75}
76
77def ApplyFoldUnitExtentDimsViaSlicesPatternsOp : Op<Transform_Dialect,
78    "apply_patterns.linalg.fold_unit_extent_dims_via_slices",
79    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
80  let description = [{
81    Collects patterns to fold unit-extent dimensions in operands/results of
82    linalg ops on tensors via rank-reducing slices.
83  }];
84
85  let assemblyFormat = "attr-dict";
86}
87
88def ApplyTilingCanonicalizationPatternsOp : Op<Transform_Dialect,
89    "apply_patterns.linalg.tiling_canonicalization",
90    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
91  let description = [{
92    Collects canonicalization patterns relevant to apply after tiling patterns.
93  }];
94
95  let assemblyFormat = "attr-dict";
96}
97
98def ApplyFoldAddIntoDestPatternsOp : Op<Transform_Dialect,
99    "apply_patterns.linalg.fold_add_into_dest",
100    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
101  let description = [{
102    Collects patterns to replace linalg.add when destination passing suffices
103    for achieving the sum.
104  }];
105
106  let assemblyFormat = "attr-dict";
107}
108
109def ApplyPadVectorizationPatternsOp : Op<Transform_Dialect,
110    "apply_patterns.linalg.pad_vectorization",
111    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
112  let description = [{
113    Apply patterns that vectorize tensor.pad.
114
115    These patterns rewrite tensor.pad Ops using vector.transfer_read and
116    vector.transfer_write operations. This is done either by:
117      1. Folding tensor.pad with an existing vector.transfer_read /
118      vector.transfer_write Op (generated prior to running these patterns).
119      2. Rewriting it (when matched together with q tensor.insert_slice
120      consumer Op) as a vector.transfer_read + vector.transfer_write pair.
121
122    In both cases, these patterns look at producers and consumers for the
123    matched tensor.pad Op to find opportunities for vectorization.
124  }];
125
126  let assemblyFormat = "attr-dict";
127}
128
129//===----------------------------------------------------------------------===//
130// BufferizeToAllocationOp
131//===----------------------------------------------------------------------===//
132
133def BufferizeToAllocationOp : Op<Transform_Dialect,
134    "structured.bufferize_to_allocation",
135    [DeclareOpInterfaceMethods<TransformOpInterface>,
136     DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
137     ReportTrackingListenerFailuresOpTrait]> {
138  let description = [{
139    This transform bufferizes the targeted operation and materializes the
140    result in a new allocation. It replaces all original uses of the target
141    result with the newly allocated buffer, wrapped in a
142    `bufferization.to_tensor` op. It returns a handle to the newly allocated
143    buffer. Furthermore, it returns a handle that is mapped to all newly created
144    ops.
145
146    Only bufferizable ops are that bufferize to a memory write or have an
147    aliasing OpOperand (and do not themselves bufferize to an allocation) are
148    supported. They are bufferized using their BufferizableOpInterface
149    implementation. E.g.:
150
151    ```
152    %0 = tensor.insert %f into %dest[%pos] : tensor<10xf32>
153    ```
154
155    Is bufferized to:
156
157    ```
158    %alloc = memref.alloc() : memref<10xf32>
159    bufferization.materialize_in_destination %dest in %alloc
160    memref.store %f, %alloc[%pos] : memref<10xf32>
161    %0 = bufferization.to_tensor %alloc restrict writable : memref<10xf32>
162    ```
163
164    Selected ops that bufferize to an allocation (or need special handling) are
165    also supported:
166    - `tensor.pad` is lowered to an allocation, followed by a `linalg.fill` and
167      and a buffer copy (all on memrefs).
168    - `vector.mask` is bufferized together with its region. The allocation is
169      placed in front of the `vector.mask` op.
170
171    An optional memory space attribute can be specified for the materialized
172    buffer allocation.
173
174    If a memory copy is needed, a "bufferization.materialize_in_destination" is
175    used when possible. This is an op with tensor semantics that will bufferize
176    to a memory copy later. Which concrete op will be used for the memory copy
177    is up to the bufferization framework. Alternatively, a custom memcpy op can
178    be specified via `memcpy_op`. Currently supported are "memref.copy" and
179    "linalg.copy". In that case, the source of each memcpy must not have a
180    custom memory space. Furthermore, because the future buffer layout unknown
181    for a given tensor, a fully dynamic layout is assumed for best
182    compatibility. Users should use "bufferization.materialize_in_destination"
183    when possible.
184
185    "memref.alloc" is used for new buffer allocations. The buffer is deallocated
186    at the end of the block if the "emit_dealloc" attribute is present. If this
187    attribute is not present, the allocated memory will be leaked. However,
188    running the `-buffer-deallocation-pipeline` after all bufferization is done
189    will properly insert the corresponding deallocation(s). Custom allocation
190    ops can be specified via `alloc_op`. Currently supported are "memref.alloc"
191    and "memref.alloca". In case of a "memref.alloca", the buffer is not
192    deallocated.
193
194    If `bufferize_destination_only` is set, only the destination operands of the
195    op are bufferized to a new memory allocation, but not the op itself.
196
197    #### Return modes
198
199    This operation consumes the `target` handle and produces the
200    `allocated_buffer` and `new_ops` handles. It always succeeds.
201  }];
202
203  let arguments = (ins TransformHandleTypeInterface:$target,
204                       OptionalAttr<AnyAttr>:$memory_space,
205                       DefaultValuedAttr<StrAttr,
206                              "\"bufferization.materialize_in_destination\"">:
207                           $memcpy_op,
208                       DefaultValuedAttr<StrAttr, "\"memref.alloc\"">:
209                           $alloc_op,
210                       UnitAttr:$bufferize_destination_only,
211                       UnitAttr:$emit_dealloc);
212  let results = (outs Transform_AnyValue:$allocated_buffer,
213                      Transform_AnyOpType:$new_ops);
214  let assemblyFormat = "$target attr-dict `:` type($target)";
215  let hasVerifier = 1;
216
217  let builders = [
218    OpBuilder<(ins "Value":$target, "Attribute":$memorySpace)>,
219    OpBuilder<(ins "Value":$target, "int64_t":$memorySpace)>
220  ];
221}
222
223//===----------------------------------------------------------------------===//
224// DecomposeOp
225//===----------------------------------------------------------------------===//
226
227def DecomposeOp : Op<Transform_Dialect, "structured.decompose",
228    [FunctionalStyleTransformOpTrait,
229     MemoryEffectsOpInterface,
230     TransformOpInterface,
231     TransformEachOpTrait,
232     ReportTrackingListenerFailuresOpTrait]> {
233  let description = [{
234    Decomposes named complex operations, such as higher-dimensional
235    (depthwise) convolutions, into combinations of lower-dimensional equivalents
236    when possible.
237
238    #### Return modes
239
240    This operation ignores non-Linalg ops and drops them in the return.
241    If all the operations referred to by the `target` handle decompose
242    properly, the transform succeeds. Otherwise the transform produces a
243    silenceable failure. The return handle points to only the subset of
244    successfully produced computational operations, which can be empty.
245  }];
246
247  let arguments = (ins TransformHandleTypeInterface:$target);
248  let results = (outs TransformHandleTypeInterface:$transformed);
249  let assemblyFormat =
250      "$target attr-dict `:` functional-type(operands, results)";
251
252  let extraClassDeclaration = [{
253    ::mlir::DiagnosedSilenceableFailure applyToOne(
254        ::mlir::transform::TransformRewriter &rewriter,
255        ::mlir::linalg::LinalgOp target,
256        ::mlir::transform::ApplyToEachResultList &results,
257        ::mlir::transform::TransformState &state);
258  }];
259}
260
261//===----------------------------------------------------------------------===//
262// EliminateLinalgOpAnchoredEmptyTensorsOp
263//===----------------------------------------------------------------------===//
264
265def EliminateLinalgOpAnchoredEmptyTensorsOp
266    : Op<Transform_Dialect, "structured.eliminate_empty_tensors",
267        [DeclareOpInterfaceMethods<TransformOpInterface>,
268         DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
269  let description = [{
270    Try to eliminate all `tensor.empty` op uses that are anchored on a LinalgOp
271    within the targeted op.
272
273    This op is similar to `bufferization.eliminate_empty_tensors`, but specific
274    to LinalgOps.
275
276    `tensor.empty` ops cannot be bufferized. They can either be converted to
277    `bufferization.alloc_tensor` or replaced with another tensor (via this
278    transform). `tensor.empty` does not specify the contents of the returned
279    tensor so their results can be replaced with arbitrary tensor values as long
280    as the dimensions match.
281
282    This transform looks for `tensor.empty` ops where the SSA use-def chain of
283    the result ends in a supported LinalgOp (always following the aliasing
284    OpOperand/OpResult chain). The following LinalgOps are supported:
285    - Only parallel iterator types.
286    - The use-def chain ends in an input operand of the LinalgOp.
287    - The LinalgOp has an unused output operand with the same shape and
288      indexing map.
289
290    Example:
291
292    ```
293    %0 = tensor.empty()
294    %1 = linalg.matmul ins(...) outs(%0)
295    %2 = linalg.generic ins(%1) outs(%dest) {
296      ^bb0(%in: f32, %out: f32):
297      // out not used
298    }
299    ```
300
301    Is rewritten with:
302    ```
303    %0 = tensor.empty()
304    %1 = linalg.matmul ins(...) outs(%dest)
305    %2 = linalg.generic ins(%0) outs(%1) {
306      ^bb0(%in: f32, %out: f32):
307      // Use %out instead of %in
308    }
309    ```
310
311    After this transformation, the "ins" operand has no uses inside the body of
312    the LinalgOp and can be folded away with existing cleanup patterns.
313    Afterwards, the tensor::EmptyOp can also fold away, so that the example can
314    bufferize without an allocation (in the absence of other conflicts).
315
316    #### Return modes
317
318    This transform reads the target handle and modifies the payload. It does
319    not produce any handle.
320  }];
321
322  let arguments = (ins TransformHandleTypeInterface:$target);
323
324  let results = (outs);
325
326  let assemblyFormat = "$target attr-dict `:` type($target)";
327}
328
329//===----------------------------------------------------------------------===//
330// FuseOp
331//===----------------------------------------------------------------------===//
332
333def FuseOp : Op<Transform_Dialect, "structured.fuse",
334    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
335     DeclareOpInterfaceMethods<TransformOpInterface>,
336     ReportTrackingListenerFailuresOpTrait]> {
337  let description = [{
338    Tiles the operations pointed to by the target handle and fuses their
339    producers greedily using the options provided as attributes.
340
341    If `apply_cleanup` is true then slice canonicalization is applied between
342    fusion steps.
343  }];
344
345  let arguments =
346    (ins TransformHandleTypeInterface:$target,
347         DefaultValuedAttr<I64ArrayAttr, "{}">:$tile_sizes,
348         DefaultValuedAttr<I64ArrayAttr, "{}">:$tile_interchange,
349         DefaultValuedAttr<BoolAttr, "false">:$apply_cleanup);
350  let results = (outs TransformHandleTypeInterface:$transformed,
351                      Variadic<TransformHandleTypeInterface>:$loops);
352
353  let assemblyFormat = [{
354    $target ($tile_sizes^)? (`interchange` $tile_interchange^)?
355    (`apply_cleanup` `=` $apply_cleanup^)? attr-dict
356    `:` functional-type(operands, results)
357  }];
358  let hasVerifier = 1;
359}
360
361//===----------------------------------------------------------------------===//
362// FuseIntoContainingOp
363//===----------------------------------------------------------------------===//
364
365def FuseIntoContainingOp :
366    Op<Transform_Dialect, "structured.fuse_into_containing_op",
367      [DeclareOpInterfaceMethods<TransformOpInterface,
368          ["allowsRepeatedHandleOperands"]>,
369       DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
370       ReportTrackingListenerFailuresOpTrait]> {
371  let summary = "Fuse a producer into a containing operation.";
372
373  let description = [{
374    Fuses the `producer_op` into the `containing_op`.
375    Returns a handle to the fused ops and the `new_containing_op`.
376
377    The producer is typically a slice of a tileable op (i.e., implements
378    TilingInterface). In that case, this transform computes the accessed
379    producer slice inside of the containing op ("tile and fuse") and if required,
380    creates a new containing op with outputs from the fused producer. Otherwise,
381    the entire producer is cloned inside the containing op ("clone and fuse").
382
383    The containing op handle must be associated with exactly one payload op. The
384    producer op handle may be associated with multiple payload ops. This
385    transform fuses producers one-by-one, always picking an unspecified producer
386    that has at least one use inside the containing op among the
387    producers. A producer can be listed multiple times in the handle.
388
389    Note: If a producer has multiple uses inside the containing op, it is
390    currently tiled and/or cloned multiple times into the containing op.
391    TODO: Reuse already fused OpResults instead of tiling/cloning a second time
392    when possible. Fuse producers according to a topological sorting to achieve
393    the largest amount of reuse.
394
395    #### Return modes
396
397    If at least one producer could not be fused, this operation produces a
398    silenceable failure.  This is the case when tiling fails or when no
399    producer op could be found among the remaining producers that has at least
400    one use within the containing op. I.e., "producers" that are not consumed
401    within the containing op are rejected by this operation.
402
403    This operation consumes the producer handle.
404    This operation only reads the containing op handle.
405  }];
406
407  let arguments = (ins TransformHandleTypeInterface:$producer_op,
408                       TransformHandleTypeInterface:$containing_op);
409  let results = (outs TransformHandleTypeInterface:$fused_op,
410                      TransformHandleTypeInterface:$new_containing_op);
411  let assemblyFormat = "$producer_op `into` $containing_op attr-dict "
412                       " `:` functional-type(operands, results)";
413
414  let builders = [
415    OpBuilder<(ins "Value":$producerOp, "Value":$containingOp)>
416  ];
417}
418
419//===----------------------------------------------------------------------===//
420// GeneralizeOp
421//===----------------------------------------------------------------------===//
422
423def GeneralizeOp : Op<Transform_Dialect, "structured.generalize",
424    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
425     TransformOpInterface, TransformEachOpTrait,
426     ReportTrackingListenerFailuresOpTrait]> {
427  let description = [{
428    Transforms a named structured operation into the generic form with the
429    explicit attached region.
430
431    #### Return modes
432
433    This operation ignores non-Linalg ops and drops them in the return.
434    If all the operations referred to by the `target` handle generalize
435    properly, the transform succeeds. Otherwise the transform produces a
436    silenceable failure.  The return handle points to only the subset of
437    successfully produced equivalent generic operations, which can be empty or
438    contain the original ops if they were already in generic form.
439  }];
440
441  let arguments = (ins TransformHandleTypeInterface:$target);
442  let results = (outs TransformHandleTypeInterface:$transformed);
443  let assemblyFormat = [{
444      $target attr-dict `:`
445      custom<SemiFunctionType>(type($target), type($transformed), "false")
446  }];
447
448  let extraClassDeclaration = [{
449    ::mlir::DiagnosedSilenceableFailure applyToOne(
450        ::mlir::transform::TransformRewriter &rewriter,
451        ::mlir::linalg::LinalgOp target,
452        ::mlir::transform::ApplyToEachResultList &results,
453        ::mlir::transform::TransformState &state);
454  }];
455}
456
457//===----------------------------------------------------------------------===//
458// SpecializeOp
459//===----------------------------------------------------------------------===//
460
461def SpecializeOp : Op<Transform_Dialect, "structured.specialize",
462    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
463     TransformOpInterface, TransformEachOpTrait,
464     ReportTrackingListenerFailuresOpTrait]> {
465  let description = [{
466    Transforms a generic operation into the equivalent named form.
467
468    #### Return modes
469
470    This operation ignores non-Linalg ops and drops them in the return. If all
471    the operations referred to by the `target` handle specialize, the transform
472    succeeds; otherwise, the operation produces a silenceable failure.  The return
473    handle points to only the subset of successfully produced equivalent named
474    operations, which can be empty or contain the original ops if they were already
475    in named form. The supported specialization to named Linalg operations are:
476    - linalg.copy of any rank.
477  }];
478
479  let arguments = (ins TransformHandleTypeInterface:$target);
480  let results = (outs TransformHandleTypeInterface:$transformed);
481  let assemblyFormat = [{
482      $target attr-dict `:`
483      custom<SemiFunctionType>(type($target), type($transformed), "false")
484  }];
485
486  let extraClassDeclaration = [{
487    ::mlir::DiagnosedSilenceableFailure applyToOne(
488        ::mlir::transform::TransformRewriter &rewriter,
489        ::mlir::linalg::LinalgOp target,
490        ::mlir::transform::ApplyToEachResultList &results,
491        ::mlir::transform::TransformState &state);
492  }];
493}
494
495//===----------------------------------------------------------------------===//
496// InterchangeOp
497//===----------------------------------------------------------------------===//
498
499def InterchangeOp : Op<Transform_Dialect, "structured.interchange",
500    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
501    TransformOpInterface, TransformEachOpTrait,
502    ReportTrackingListenerFailuresOpTrait]> {
503  let description = [{
504    Interchanges the iterators of the operations pointed to by the target handle
505    using the iterator interchange attribute.
506
507    #### Return modes
508
509    This operation ignores non-linalg::Generic ops and drops them in the return.
510    This operation fails if the interchange attribute is invalid.
511    If all the operations referred to by the `target` handle interchange
512    properly, the transform succeeds.
513    If any interchange fails, the transform produces a definite failure.
514    The return handle points to only the subset of successfully produced
515    interchanged operations, which can be empty.
516  }];
517
518  let arguments =
519    (ins TransformHandleTypeInterface:$target,
520         ConfinedAttr<DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">,
521                      [DenseArrayNonNegative<DenseI64ArrayAttr>]>:$iterator_interchange);
522  let results = (outs TransformHandleTypeInterface:$transformed);
523
524  let assemblyFormat = [{
525    $target
526    (`iterator_interchange` `=` $iterator_interchange^)? attr-dict
527    `:` custom<SemiFunctionType>(type($target), type($transformed), "false")
528  }];
529  let hasVerifier = 1;
530
531  let extraClassDeclaration = [{
532    ::mlir::DiagnosedSilenceableFailure applyToOne(
533        ::mlir::transform::TransformRewriter &rewriter,
534        ::mlir::linalg::GenericOp target,
535        ::mlir::transform::ApplyToEachResultList &results,
536        ::mlir::transform::TransformState &state);
537  }];
538}
539
540//===----------------------------------------------------------------------===//
541// LowerPackOp
542//===----------------------------------------------------------------------===//
543def LowerPackOp : Op<Transform_Dialect, "structured.lower_pack", [
544                         FunctionalStyleTransformOpTrait,
545                         MemoryEffectsOpInterface,
546                         TransformEachOpTrait,
547                         TransformOpInterface,
548                         ReportTrackingListenerFailuresOpTrait]> {
549  let description = [{
550    Rewrite a tensor.pack into tensor.pad + tensor.expand_shape + linalg.transpose.
551
552    #### Return modes
553
554    This operation ignores non-pack ops and drops them in the return.
555    This operation produces a silenceable failure if the rewrite fails for any
556    reason.
557    If all the operations referred to by the `target` are rewritten, the
558    transform succeeds.
559    Return handles to the newly produced pad, expand_shape and transpose ops.
560  }];
561
562  let arguments = (ins Transform_ConcreteOpType<"tensor.pack">:$target,
563                       DefaultValuedAttr<BoolAttr, "true">:$lowerPadLikeWithInsertSlice);
564  let results = (outs Transform_ConcreteOpType<"tensor.pad">:$pad_op,
565                      Transform_ConcreteOpType<"tensor.expand_shape">:$expand_shape_op,
566                      Transform_ConcreteOpType<"linalg.transpose">:$transpose_op);
567  let assemblyFormat = [{
568    $target attr-dict `:` functional-type(operands, results)
569  }];
570
571  let extraClassDeclaration = [{
572    ::mlir::DiagnosedSilenceableFailure applyToOne(
573        ::mlir::transform::TransformRewriter &rewriter,
574        ::mlir::tensor::PackOp target,
575        ::mlir::transform::ApplyToEachResultList &transformResults,
576        ::mlir::transform::TransformState &state);
577  }];
578}
579
580//===----------------------------------------------------------------------===//
581// LowerUnPackOp
582//===----------------------------------------------------------------------===//
583def LowerUnPackOp : Op<Transform_Dialect, "structured.lower_unpack", [
584                         FunctionalStyleTransformOpTrait,
585                         MemoryEffectsOpInterface,
586                         TransformEachOpTrait,
587                         TransformOpInterface,
588                         ReportTrackingListenerFailuresOpTrait]> {
589  let description = [{
590    Lower a tensor.unpack into empty + linalg.transpose + tensor.collapse_shape +
591    tensor.extract_slice.
592
593    #### Return modes
594
595    This operation ignores non-unpack ops and drops them in the return.
596    This operation produces a silenceable failure if the rewrite fails for any
597    reason.
598    If all the operations referred to by the `target` are rewritten, the
599    transform succeeds.
600    Return handles to the newly produced empty, transpose, collapse_shape and extract_slice ops.
601  }];
602
603  let arguments = (ins Transform_ConcreteOpType<"tensor.unpack">:$target,
604                       DefaultValuedAttr<BoolAttr, "true">:$lowerUnpadLikeWithExtractSlice);
605  let results = (outs Transform_ConcreteOpType<"tensor.empty">:$empty_op,
606                      Transform_ConcreteOpType<"linalg.transpose">:$transpose_op,
607                      Transform_ConcreteOpType<"tensor.collapse_shape">:$collapse_shape_op,
608                      Transform_ConcreteOpType<"tensor.extract_slice">:$extract_slice_op);
609  let assemblyFormat = [{
610    $target attr-dict `:` functional-type(operands, results)
611  }];
612
613  let extraClassDeclaration = [{
614    ::mlir::DiagnosedSilenceableFailure applyToOne(
615        ::mlir::transform::TransformRewriter &rewriter,
616        ::mlir::tensor::UnPackOp target,
617        ::mlir::transform::ApplyToEachResultList &transformResults,
618        ::mlir::transform::TransformState &state);
619  }];
620}
621
622//===----------------------------------------------------------------------===//
623// MatchOp
624//===----------------------------------------------------------------------===//
625
626def MatchOp : Op<Transform_Dialect, "structured.match",
627    [MemoryEffectsOpInterface,
628     NavigationTransformOpTrait,
629     DeclareOpInterfaceMethods<TransformOpInterface>]> {
630  let description = [{
631    Match op with the specified constraints, within the target op.
632
633    The following constraints are supported:
634      - interface: an optional MatchInterfaceEnum specifying an enum
635        representation for an interface to target.
636      - ops: an optional StrArrayAttr specifying the concrete name of an op.
637        Multiple names can be specified. Matched ops must have one of specified
638        names.
639      - attribute: the matched op must have all specified attributes (with their
640        specified values).
641      - filter_result_type: the matched op must return exactly this one type.
642      - filter_operand_types: all the operands of the matched op must must be of
643        this type. If more than a type is specified, then the length of the list
644        must be equal to the number of operands in the matched op, and the match
645        will succeed only if the operand types match all the types in the list
646        in the order in which they are specified.
647
648    Note: Only ops that satisfy all specified constraints are matched.
649
650    TODO: Extend with regions to allow a limited form of constraints.
651
652    #### Return modes
653
654    This op traverses the ops nested under `target` and returns the handles to
655    all the operations that match the requirements.
656
657    This op fails if the target is not a handle to exactly one operation.
658    Otherwise it succeeds.
659
660    This operation does not consume the target handle and produces new handles:
661    it is a navigation op.
662  }];
663
664  let arguments = (ins TransformHandleTypeInterface:$target,
665                       OptionalAttr<StrArrayAttr>:$ops,
666                       OptionalAttr<MatchInterfaceEnum>:$interface,
667                       OptionalAttr<DictionaryAttr>:$op_attrs,
668                       OptionalAttr<TypeAttr>:$filter_result_type,
669                       OptionalAttr<TypeArrayAttr>:$filter_operand_types);
670  // TODO: variadic results when needed.
671  let results = (outs TransformHandleTypeInterface:$results);
672
673  let builders = [
674    OpBuilder<(ins "Value":$target, "ArrayRef<StringRef>":$opNames)>,
675    OpBuilder<(ins "TypeRange":$resultTypes, "Value":$target, "ArrayRef<StringRef>":$opNames)>
676  ];
677
678  let assemblyFormat = [{
679    (`ops` `{` $ops^ `}`)?
680    (`interface` `{` $interface^ `}`)?
681    (`attributes` $op_attrs^)?
682    (`filter_result_type` `=` $filter_result_type^)?
683    (`filter_operand_types` `=` $filter_operand_types^)?
684    `in` $target attr-dict
685    `:` functional-type($target, results)
686  }];
687}
688
689//===----------------------------------------------------------------------===//
690// MultiTileSizesOp
691//===----------------------------------------------------------------------===//
692
693def MultiTileSizesOp : Op<Transform_Dialect, "structured.multitile_sizes",
694    [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
695     TransformOpInterface, TransformEachOpTrait,
696     ReportTrackingListenerFailuresOpTrait]> {
697  let description = [{
698    Emits the IR computing the tile sizes `s1` and `s2` such that:
699
700      - there exists a combination of `n` tiles of size `s1` and `m` tiles of
701        size `s2` that covers the entirety of the iteration space `dimension` of
702        the target structured op;
703      - `s1`, `s2` is less than or equal to `target_size`;
704      - `s1` and `s2` are divisible by `divisor.
705
706    For example, for a dimension of size 54 with target size 12 and divisor 2,
707    this can emit the IR computing the tile size 10, used for 3 tiles, and 12,
708    used for 2 tiles, totally 10*3 + 12*2 = 54. Note that when the divisor does
709    not divide the original dimension size, it is impossible to compute such
710    tile sizes. An assertion is emitted to guard against this in the dynamic
711    case.
712
713    Expects the target size and the divisor to be strictly positive. Folds the
714    IR as much as possible, normally obtaining constant sizes and numbers of
715    tiles for a statically known dimension.
716
717    This does *not* consume the target handle and produces three handles each
718    pointing to single-result index-typed operations (which may be arithmetic
719    constant operations) defining the two respective tile sizes and the product
720    of the first tile size with the number of tiles of that size (useful for
721    splitting the iteration space).
722
723    This operation composes with the regular tiling when applied per-dimension:
724
725    ```mlir
726    %sz1, %sz2, %split = structured.multitile_sizes %target
727                         { target_size = 10, dimension = 1 }
728                       : !transform.any_op, !transform.param<i64>,
729                         !transform.param<i64>, !transform.param<i64>
730    %handles = structured.split %target after %split { dimension = 1 }
731                : !transform.any_op, !transform.param<i64>
732    %low, %high = transform.split_handle %handles : (!transform.any_op)
733                      -> (!transform.any_op, !transform.any_op)
734    %tiled_low, %loop1 = structured.tile_using_for %low [0, %sz1]
735                       : (!transform.any_op, !transform.param<i64>)
736                      -> (!transform.any_op, !transform.any_op)
737    %tiled_high, %loop2 = structured.tile_using_for %high [0, %sz2]
738                        : (!transform.any_op, !transform.param<i64>)
739                       -> (!transform.any_op, !transform.any_op)
740    %common = merge_handles %tiled_low, %tiled_high : !transform.any_op
741
742    %sz3, %sz4, %split = structured.multitile_size %target
743                         { target_size = 42, dimension = 0 }
744                       : !transform.any_op, !transform.any_op,
745                         !transform.any_op, !transform.any_op
746    %sz3r, %sz4r, %splitr = replicate num(%common) %sz3, %sz4, %splitr
747             : !transform.any_op, !transform.any_op, !transform.any_op
748    structured.split %common after %splitr { dimension = 0 }
749             : !transform.any_op, !transform.any_op
750    // ...
751    ```
752  }];
753
754  let arguments = (ins TransformHandleTypeInterface:$target,
755                       I64Attr:$dimension,
756                       I64Attr:$target_size,
757                       DefaultValuedAttr<I64Attr, "1">:$divisor);
758  let results = (outs TransformAnyParamTypeOrAnyHandle:$low_size,
759                      TransformAnyParamTypeOrAnyHandle:$high_size,
760                      TransformAnyParamTypeOrAnyHandle:$split_point);
761  let hasVerifier = 1;
762  let assemblyFormat =
763    "$target attr-dict `:` custom<MultitileSizesTypes>("
764    "type($target), type($low_size), type($high_size), type($split_point))";
765
766  let extraClassDeclaration = [{
767    ::mlir::DiagnosedSilenceableFailure applyToOne(
768        ::mlir::transform::TransformRewriter &rewriter,
769        ::mlir::linalg::LinalgOp target,
770        ::mlir::transform::ApplyToEachResultList &results,
771        TransformState &state);
772  }];
773}
774
775//===----------------------------------------------------------------------===//
776// PackOp
777//===----------------------------------------------------------------------===//
778
779def PackOp : Op<Transform_Dialect, "structured.pack", [
780                DeclareOpInterfaceMethods<TransformOpInterface>,
781                DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
782                ReportTrackingListenerFailuresOpTrait]> {
783  let description = [{
784    Pack a LinalgOp by applying a data tiling transformation on the op and
785    packing the operands according to the `packed_sizes` specification.
786
787    Iterator dimensions are tiled in their canonical order in the op spec.
788    Operands are packed according to the same canonical order of the op iterator
789    dimensions.
790
791    Specifying a packed size of 0 for an iterator removes it from consideration
792    for packing.
793
794    `tensor.pack` (resp. `tensor.unpack`) operations are inserted for the operands
795    (resp. results) that need to be packed (resp. unpacked) according to the
796    `packed_sizes` specification.
797
798    #### Example
799
800    Consider a `linalg.matmul` with indexing maps:
801    ```
802      //              M   N   K       M   K
803      // affine_map<(d0, d1, d2) -> (d0, d2)>
804      //                              K   N
805      // affine_map<(d0, d1, d2) -> (d2, d1)>
806      //                              M   N
807      // affine_map<(d0, d1, d2) -> (d0, d1)>
808      %0 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
809                         outs(    %C: tensor<?x?xf32>)
810    ```
811
812    Specifying packed_sizes [2, 3, 4] results in tiling the iterator dimensions
813    M, N and K, in this order, in both the op and its operands.
814    ```
815      //              M   N   K   m   n   k       M   K   m   k
816      // affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
817      //                                          K   N   n   k
818      // affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d4, d5)>
819      //                                          M   N   m   n
820      // affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
821      %0 = linalg.generic_representing_some_higher_d_matmul
822            ins(%A, %B: tensor<?x?x2x4xf32>, tensor<?x?x4x3xf32>)
823           outs(    %C: tensor<?x?x2x3xf32>)
824    ```
825    In particular, note that the second operand `B` has shape `KxNxnxk` (and not
826    `KxNxkxn` as one could expect by looking **only** at the operand).
827
828    Other layouts can be obtained unsurprisingly from this canonical
829    transformation by composing the resulting operation with a
830    `transform.structured.pack_transpose` op.
831    This composition allows separating concerns and composes better compared
832    to adding additional permutation attributes to this transform op.
833
834    #### Return modes
835
836    This operation applies to a single Linalg op, otherwise it fails.
837    This operation may produce a definite failure if the packing fails for any
838    reason.
839
840    The returned handle point to the packed LinalgOp.
841  }];
842
843  let arguments = (ins TransformHandleTypeInterface:$target,
844                   Variadic<TransformHandleTypeInterface>:$packed_sizes,
845                   DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$static_packed_sizes);
846  let results = (outs TransformHandleTypeInterface:$packed_op);
847  let assemblyFormat = [{
848    $target
849    `packed_sizes` `=` custom<DynamicIndexList>($packed_sizes,
850                                                $static_packed_sizes)
851    attr-dict
852    `:` functional-type(operands, results)
853  }];
854
855  let builders = [
856    OpBuilder<(ins "Value":$target,
857                   "ArrayRef<OpFoldResult>":$mixedPackedSizes)>
858  ];
859
860  let extraClassDeclaration = [{
861    ::llvm::SmallVector<::mlir::OpFoldResult> getMixedPackedSizes();
862  }];
863}
864
865//===----------------------------------------------------------------------===//
866// PackGreedilyOp
867//===----------------------------------------------------------------------===//
868
869def PackGreedilyOp : Op<Transform_Dialect, "structured.pack_greedily", [
870                        DeclareOpInterfaceMethods<TransformOpInterface>,
871                        DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
872                        ReportTrackingListenerFailuresOpTrait]> {
873  let description = [{
874    Target a Linalg op and rewrite it into packed LinalgOp form by trying to
875    infer whether a known suboperation is embedded
876
877    Different packing strategies are applied in order, when one applies
878    successfully, the transform returns:
879      1. Matmul packing: Try to infer a matmul operation embedded in the target op.
880         Specifically, this looks for 2 parallel dimensions that participate in
881         an outer-product and 1 reduction dimension.
882         These dimensions are referred as (m, n, k) to match canonical matmul
883         terminology.
884
885         The packed sizes for (m, n, k) are specified by `matmul_packed_sizes`
886         and the optional `matmul_padded_sizes_next_multiple_of`.
887         When an entry `matmul_packed_sizes[i]` is non-0, the corresponding
888         dimension is packed by `matmul_packed_sizes[i]`.
889         Otherwise, the dimension is merely padded to the next multiple of
890         `matmul_padded_sizes_next_multiple_of[i]`.
891
892         `matmul_padded_sizes_next_multiple_of` is optional and is expected to
893         either be empty or of size `3`, matching the size of `matmul_packed_sizes`.
894         For each individual element of `matmul_packed_sizes` and
895         `matmul_padded_sizes_next_multiple_of`, only one of them is allowed to
896         be non-zero.
897
898         The ordering of the packed dimensions (mm, nn, kk) is specified by the
899         `matmul_inner_dims_order` attribute.
900
901    Packing occurs as follows:
902      1. Find the dimensions to pack according to the strategy.
903      2. The target is converted to linalg.generic form.
904      3. An interchange transform is applied to isolate the dimensions to pack as
905         the most minor indexing dimensions of the linalg.generic. The most minor
906         dimensions are themselves ordered according to `inner_dims_order`.
907      4. An elementwise traversal of `matmul_packed_sizes` and
908         `matmul_padded_sizes_next_multiple_of` is performed and for each
909         dimension `d`, either pack to `matmul_packed_sizes[d]` or pad to the
910         `matmul_padded_sizes_next_multiple_of[d]`.
911      5. Packing/padding is performed by the amounts determined in step 4. and
912         following `inner_dims_order`.
913
914    By normalizing the most minor dimensions to `inner_dims_order`, the transform
915    guarantees that packing immediately generates inner dimensions in a desirable
916    layout.
917
918    Outer dimension layout permutations are not controlled by this transform op
919    at the moment and can be obtained by composing with the pack_transpose
920    transformation.
921
922    #### Return modes
923
924    This operation ignores non-Linalg ops and drops them in the return.
925    It returns the list of packed Linalg ops or the original op when all available
926    packing strategies failed to apply.
927  }];
928
929  // TODO: Transform_ConcreteOpType<linalg::LinalgOp> needs interface.
930  let arguments = (ins TransformHandleTypeInterface:$target,
931                   Variadic<TransformHandleTypeInterface>:$matmul_packed_sizes,
932                   ConfinedAttr<DefaultValuedAttr<DenseI64ArrayAttr, "{}">,
933                                 [DenseArrayCount<3>]>:$static_matmul_packed_sizes,
934                   ConfinedAttr<DefaultValuedAttr<DenseI64ArrayAttr, "{}">,
935                                 [Attr<
936                                    Or<[DenseArrayCount<0>.predicate,
937                                        DenseArrayCount<3>.predicate]>,
938                                        "with 0 or 3 elements"
939                                      >]>
940                                 :$matmul_padded_sizes_next_multiple_of,
941                   ConfinedAttr<DefaultValuedAttr<DenseI64ArrayAttr, "{}">,
942                                 [DenseArrayCount<3>]>:$matmul_inner_dims_order);
943  let results = (outs TransformHandleTypeInterface:$packed_op);
944
945  let builders = [
946    OpBuilder<(ins "Value":$target,
947                   "ArrayRef<OpFoldResult>":$mixedMatmulPackedSizes,
948                   "ArrayRef<int64_t>":$matmulPaddededSizesNextMultipleOf,
949                   CArg<"ArrayRef<int64_t>", "{}">:$matmulDimsInnerDimsOrder)>
950  ];
951
952  let assemblyFormat = [{
953    $target
954    oilist(
955      `matmul_packed_sizes` `=` custom<DynamicIndexList>($matmul_packed_sizes,
956                                                         $static_matmul_packed_sizes)
957      (`matmul_padded_sizes_next_multiple_of` `=`
958        $matmul_padded_sizes_next_multiple_of^)?
959      `matmul_inner_dims_order` `=` $matmul_inner_dims_order
960    )
961    attr-dict
962    `:` functional-type(operands, results)
963  }];
964  let hasVerifier = 1;
965
966  let extraClassDeclaration = [{
967    /// Returns the list of tile sizes, which may be static (Attribute) or
968    /// dynamic (Value).
969    SmallVector<OpFoldResult> getMixedMatmulPackedSizes();
970  }];
971}
972
973//===----------------------------------------------------------------------===//
974// PackTransposeOp
975//===----------------------------------------------------------------------===//
976
977def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
978                         FunctionalStyleTransformOpTrait,
979                         MemoryEffectsOpInterface,
980                         DeclareOpInterfaceMethods<TransformOpInterface>,
981                         ReportTrackingListenerFailuresOpTrait]> {
982  let description = [{
983    Apply a transposition to a single `tensor.pack` (resp. `tensor.unpack`) and
984    update the `linalg.generic` op that consumes (resp. produces) the operation.
985
986    This transform allows composing a simple `structured.pack` with additional
987    transpositions to e.g. match the data format required by a specific library
988    call or ISA instruction.
989
990    The transpose spec must specify at least one of `outer_perm` or `inner_perm`
991    attributes, which will act upon the `outer_dims_perm` or `inner_dims_pos` of
992    the specified `tensor.pack` or `tensor.unpack` op.
993
994    If the `target` of this op is a `tensor.pack` then a new `tensor.empty` will
995    be created along with transposed versions of the `tensor.pack` and the
996    consuming `linalg.generic`, which is expected to be the sole consumer.
997
998    If the `target` of this op is a `tensor.unpack` then the whole pack / compute
999    / unpack chain will be transposed and transposed clones of `tensor.pack`,
1000    the consuming `linalg.generic` and the tail `tensor.pack` will be created.
1001
1002    #### Return modes
1003
1004    This operation targets a single `tensor.pack` / `tensor.unpack` op and a
1005    single matching `linalg.generic` that consumes / produces the op. Otherwise,
1006    it produces a silenceableFailure.
1007
1008    This operation may produce a silenceableFailure if the transpose spec is
1009    ill-formed (i.e. `outer_perm` or `inner_perm` are not permutations of the
1010    proper rank) or if the transposition of all involved operations fails for any
1011    reason.
1012
1013    This operation returns 3 handles, one to the transformed LinalgOp, one to
1014    the transformed `tensor.pack` and one to the transformed `tensor.unpack`.
1015    The last handle for `tensor.unpack` is empty if `target_pack_or_unpack_op`
1016    was not itself a `tensor.unpack`.
1017  }];
1018
1019  let arguments = (ins TransformHandleTypeInterface:$target_pack_or_un_pack_op,
1020                       TransformHandleTypeInterface:$target_linalg_op,
1021                       DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_perm,
1022                       DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$inner_perm);
1023  let results = (outs TransformHandleTypeInterface:$packed_op,
1024                      TransformHandleTypeInterface:$pack_op,
1025                      TransformHandleTypeInterface:$un_pack_op);
1026  let assemblyFormat = [{
1027    $target_pack_or_un_pack_op
1028    `with_compute_op` `(` $target_linalg_op `)`
1029    (`outer_perm` `=` $outer_perm^ )?
1030    (`inner_perm` `=` $inner_perm^ )?
1031    attr-dict
1032    `:` functional-type(operands, results)
1033  }];
1034
1035  let hasVerifier = 1;
1036}
1037
1038//===----------------------------------------------------------------------===//
1039// PadOp
1040//===----------------------------------------------------------------------===//
1041
1042def PadOp : Op<Transform_Dialect, "structured.pad",
1043    [FunctionalStyleTransformOpTrait, DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1044     TransformOpInterface,
1045     ReportTrackingListenerFailuresOpTrait]> {
1046  let description = [{
1047    Pads the operations pointed to by the target handle using the options
1048    provides as operation attributes. The operation returns a handle to the
1049    padded operation and to the padding operation ("tensor.pad").
1050
1051    To preserve tensor SSA use-def chains, the unpadded result is copied back to
1052    the original destination tensor of the targeted op. The op that copies back
1053    the result can be customized with `copy_back_op`:
1054
1055    * "bufferization.materialize_in_destination" (default)
1056    * "linalg.copy"
1057    * "none" (no copy back)
1058
1059    #### Return modes
1060
1061    This operation ignores non-Linalg ops and drops them in the return.
1062    This operation may produce a definite failure if the padding fails for any
1063    reason.
1064
1065    If all the operations referred to by the `target` handle pad
1066    properly, the transform succeeds. Otherwise the transform produces a
1067    silenceable failure.
1068    The return handle points to only the subset of successfully produced
1069    padded operations, which can be empty.
1070  }];
1071
1072  let arguments =
1073    (ins TransformHandleTypeInterface:$target,
1074         DefaultValuedAttr<ArrayAttr, "{}">:$padding_values,
1075         DefaultValuedAttr<I64ArrayAttr, "{}">:$padding_dimensions,
1076         Variadic<TransformAnyParamTypeOrAnyHandle>:$pad_to_multiple_of,
1077         DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:
1078                          $static_pad_to_multiple_of,
1079         DefaultValuedAttr<I64ArrayAttr, "{}">:$nofold_flags,
1080         DefaultValuedAttr<
1081          TypedArrayAttrBase<I64ArrayAttr, "array of arrays of i64">,
1082          "{}">:$transpose_paddings,
1083         DefaultValuedAttr<StrAttr, "::mlir::bufferization::MaterializeInDestinationOp::getOperationName()">:$copy_back_op);
1084  let results = (outs TransformHandleTypeInterface:$padded,
1085                      TransformHandleTypeInterface:$pad,
1086                      TransformHandleTypeInterface:$copy);
1087
1088  let assemblyFormat = [{
1089    $target
1090    (`pad_to_multiple_of` custom<DynamicIndexList>($pad_to_multiple_of, $static_pad_to_multiple_of)^)?
1091    attr-dict
1092    `:` functional-type(operands, results)
1093  }];
1094
1095  let hasVerifier = 1;
1096
1097  let builders = [
1098    // Builder for a transform::PadOp with automatic inference of padding
1099    // value. Warning: this will set the value 0 for the inferred elemental
1100    // type without taking the op into account and thus only work for the
1101    // add/mul ring at the moment.
1102    // TODO: support other operations (e.g. min, max etc).
1103    OpBuilder<(ins "Value":$target,
1104                   "ArrayRef<int64_t>":$paddingDimensions,
1105                   CArg<"ArrayRef<int64_t>", "{}">:$staticPadToMultipleOf,
1106                   CArg<"ArrayRef<int64_t>", "{}">:$nofoldFlags,
1107                   CArg<"ArrayRef<Attribute>", "{}">:$transposePaddings,
1108                   CArg<"StringRef", "::mlir::bufferization::MaterializeInDestinationOp::getOperationName()">:$copyBackOp)>,
1109    OpBuilder<(ins "Value":$target,
1110                   "ArrayRef<int64_t>":$paddingDimensions,
1111                   "ArrayRef<OpFoldResult>":$mixedPadToMultipleOf,
1112                   CArg<"ArrayRef<int64_t>", "{}">:$nofoldFlags,
1113                   CArg<"ArrayRef<Attribute>", "{}">:$transposePaddings,
1114                   CArg<"StringRef", "::mlir::bufferization::MaterializeInDestinationOp::getOperationName()">:$copyBackOp)>
1115  ];
1116
1117  let extraClassDeclaration = [{
1118    /// copy_back_op attribute value indicating that no copy back is desired.
1119    static constexpr StringRef kCopyOpNone = "none";
1120
1121    /// Returns a mix of dynamic `pad_to_multiple_of` and static `static_pad_to_multiple_of`.
1122    SmallVector<OpFoldResult> getMixedPadToMultipleOf();
1123
1124    ::mlir::DiagnosedSilenceableFailure apply(
1125      ::mlir::transform::TransformRewriter &rewriter,
1126      ::mlir::transform::TransformResults &results,
1127      ::mlir::transform::TransformState &state);
1128  }];
1129}
1130
1131//===----------------------------------------------------------------------===//
1132// HoistPadOp
1133//===----------------------------------------------------------------------===//
1134
1135def HoistPadBuildPackingLoopNestOp :
1136    Op<Transform_Dialect,
1137       "structured.hoist_pad.build_packing_loop_nest",
1138    [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1139     DeclareOpInterfaceMethods<TransformOpInterface>,
1140     ReportTrackingListenerFailuresOpTrait]> {
1141  let description = [{
1142    Helper transform used to hoist a tensor.pad target operation. This operation
1143    creates the packing loop nest required by the hoist_pad operation and makes
1144    that functionality available independently.
1145
1146    TODO: In the future, we should consider rewriting as a tensor.pack after
1147    hoisting since this abstraction is now available.
1148
1149    #### Return modes
1150
1151    This operation ignores non-tensor.pad ops and drops them in the result.
1152    If any non-tensor.pad is passed, the transform emits a silenceable failure.
1153
1154    The return handle points to only the subset of successfully created packing
1155    loop nests, which can be empty.
1156  }];
1157
1158  // Also allow any payload operation for simpler composition. Non-tensor.pad ops
1159  // will be dropped from the results.
1160  let arguments =
1161    (ins TransformHandleTypeInterface:$target,
1162         TransformHandleTypeInterface:$loop,
1163         DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$transpose);
1164  let results = (outs TransformHandleTypeInterface:$packing_loop);
1165
1166  let assemblyFormat = [{
1167    $target
1168    `above` $loop
1169    (`,` `transpose` `by` $transpose^)?
1170    attr-dict
1171    `:` functional-type(operands, results)
1172  }];
1173  let hasVerifier = 1;
1174}
1175
1176def HoistPadOp : Op<Transform_Dialect, "structured.hoist_pad",
1177    [FunctionalStyleTransformOpTrait,
1178     MemoryEffectsOpInterface,
1179     TransformOpInterface,
1180     TransformEachOpTrait]> {
1181  let description = [{
1182    Hoist the tensor.pad target operation by at most the given number of loops.
1183    Optionally apply the transpose attribute to the inner dimensions.
1184
1185    TODO: In the future, we should consider rewriting as a tensor.pack after
1186    hoisting since this abstraction is now available.
1187    TODO: Maybe also return the linalg.generic transpose created at some point.
1188
1189    #### Return modes
1190
1191    This operation ignores non-tensor.pad ops and drops them in the result.
1192    If any non-tensor.pad is passed, the transform emits a silenceable failure.
1193
1194    If all the operations referred to by the `target` handle padproperly, the
1195    transform succeeds. Otherwise the transform produces a silenceable failure.
1196
1197    The return handle points to only the subset of successfully hoisted
1198    tensor.pad operations, which can be empty.
1199  }];
1200
1201  // Also allow any operation for simpler composition. Non-tensor.pad ops
1202  // will be dropped from the results.
1203  let arguments =
1204    (ins TransformHandleTypeInterface:$target,
1205         I64Attr:$num_loops,
1206         DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$transpose);
1207  let results = (outs TransformHandleTypeInterface:$transformed);
1208
1209  let assemblyFormat = [{
1210    $target
1211    `by` $num_loops `loops`
1212    (`,` `transpose` `by` $transpose^)?
1213    attr-dict
1214    `:` functional-type(operands, results)
1215  }];
1216  let hasVerifier = 1;
1217
1218  let extraClassDeclaration = [{
1219    ::mlir::DiagnosedSilenceableFailure applyToOne(
1220        ::mlir::transform::TransformRewriter &rewriter,
1221        ::mlir::tensor::PadOp,
1222        ::mlir::transform::ApplyToEachResultList &results,
1223        ::mlir::transform::TransformState &state);
1224  }];
1225}
1226
1227//===----------------------------------------------------------------------===//
1228// PromoteOp
1229//===----------------------------------------------------------------------===//
1230
1231
1232def PromoteOp : Op<Transform_Dialect, "structured.promote",
1233    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
1234    TransformOpInterface, TransformEachOpTrait,
1235    ReportTrackingListenerFailuresOpTrait]> {
1236  let description = [{
1237    Promotes the specified operands of the target into a separate memory buffer.
1238
1239    At this point, this transform does not allow customizing alloc/dealloc
1240    functions nor the behavior on copy in/out operations.
1241
1242    #### Return modes
1243
1244    This operation applies to a single Linalg op that satisfies the
1245    `promoteSubviewsPrecondition`, otherwise it fails.
1246
1247    If the operations referred to by the `target` handle promote
1248    properly, the transform succeeds.
1249
1250    When successful, the return handle points to the $target operation that
1251    was modified inplace.
1252  }];
1253
1254  let arguments = (ins TransformHandleTypeInterface:$target,
1255                       DefaultValuedAttr<I64ArrayAttr, "{}">:$operands_to_promote,
1256                       DefaultValuedAttr<BoolArrayAttr, "{}">:$use_full_tile_buffers,
1257                       UnitAttr:$use_full_tiles_by_default,
1258                       UnitAttr:$use_alloca,
1259                       OptionalAttr<AnyAttr>:$memory_space,
1260                       OptionalAttr<DeviceMappingArrayAttr>:$mapping,
1261                       OptionalAttr<I64Attr>:$alignment);
1262  let results = (outs TransformHandleTypeInterface:$transformed);
1263
1264  let assemblyFormat = [{
1265    $target attr-dict `:`
1266    custom<SemiFunctionType>(type($target), type($transformed), "false")
1267  }];
1268
1269  let extraClassDeclaration = [{
1270    ::mlir::DiagnosedSilenceableFailure applyToOne(
1271        ::mlir::transform::TransformRewriter &rewriter,
1272        ::mlir::linalg::LinalgOp target,
1273        ::mlir::transform::ApplyToEachResultList &results,
1274        ::mlir::transform::TransformState &state);
1275  }];
1276}
1277
1278//===----------------------------------------------------------------------===//
1279// ReplaceOp
1280//===----------------------------------------------------------------------===//
1281
1282def ReplaceOp : Op<Transform_Dialect, "structured.replace",
1283    [IsolatedFromAbove, DeclareOpInterfaceMethods<TransformOpInterface>,
1284     DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1285     ReportTrackingListenerFailuresOpTrait] # GraphRegionNoTerminator.traits> {
1286  let description = [{
1287    Replace all `target` payload ops with the single op that is contained in
1288    this op's region. All targets must have zero arguments and must be isolated
1289    from above.
1290
1291    This op is for debugging/experiments only.
1292
1293    #### Return modes
1294
1295    This operation consumes the `target` handle.
1296  }];
1297
1298  let arguments = (ins TransformHandleTypeInterface:$target);
1299  let results = (outs TransformHandleTypeInterface:$replacement);
1300  let regions = (region SizedRegion<1>:$bodyRegion);
1301  let assemblyFormat = [{
1302      $target attr-dict-with-keyword regions `:`
1303      custom<SemiFunctionType>(type($target), type($replacement), "false")
1304  }];
1305  let hasVerifier = 1;
1306}
1307
1308//===----------------------------------------------------------------------===//
1309// ScalarizeOp
1310//===----------------------------------------------------------------------===//
1311
1312def ScalarizeOp : Op<Transform_Dialect, "structured.scalarize",
1313    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
1314     TransformOpInterface, TransformEachOpTrait,
1315     ReportTrackingListenerFailuresOpTrait]> {
1316  let description = [{
1317    Indicates that ops of a specific kind in the given function should be
1318    scalarized (i.e. their dynamic dimensions tiled by 1).
1319
1320    #### Return modes:
1321
1322    This operation ignores non-Linalg ops and drops them in the return.
1323    This operation produces definite failure if the scalarization fails for any
1324    reason.
1325    If all the operations referred to by the `target` handle scalarize
1326    properly, the transform succeeds. Otherwise the transform produces a
1327    silenceable failure.
1328
1329    The return handle points to only the subset of successfully produced
1330    tiled-by-1 operations, which can be empty.
1331
1332    This operation does not return handles to the tiled loop.
1333    We make this design choice because it is hard to know ahead of time the
1334    number of loops that will be produced (it depends on the number of dynamic
1335    dimensions after multiple transformations have been applied).
1336    Loops can always be recovered by navigating from the tiled operations if
1337    needed.
1338  }];
1339
1340  let arguments = (ins TransformHandleTypeInterface:$target);
1341  let results = (outs TransformHandleTypeInterface:$result);
1342
1343  let assemblyFormat = [{
1344    $target attr-dict `:`
1345    custom<SemiFunctionType>(type($target), type($result), "false")
1346  }];
1347
1348  let extraClassDeclaration = [{
1349    ::mlir::DiagnosedSilenceableFailure applyToOne(
1350        ::mlir::transform::TransformRewriter &rewriter,
1351        ::mlir::linalg::LinalgOp target,
1352        ::mlir::transform::ApplyToEachResultList &results,
1353        ::mlir::transform::TransformState &state);
1354  }];
1355}
1356
1357//===----------------------------------------------------------------------===//
1358// ConvertToLoopsOp
1359//===----------------------------------------------------------------------===//
1360
1361def ConvertToLoopsOp : Op<Transform_Dialect, "structured.convert_to_loops",
1362    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
1363     DeclareOpInterfaceMethods<TransformOpInterface>,
1364     ReportTrackingListenerFailuresOpTrait]> {
1365  let description = [{
1366    For operations that implement the `TilingInterface`, and implement
1367    the `generateScalarImplementation` method, lowers the operation to
1368    loops. The return handle points to all generated loops.
1369    Fails if the payload ops cannot be lowered to loops.
1370  }];
1371
1372  let arguments = (ins TransformHandleTypeInterface:$target);
1373  let results = (outs TransformHandleTypeInterface:$result);
1374
1375  let assemblyFormat = [{
1376    $target attr-dict `:` functional-type(operands, results)
1377  }];
1378}
1379
1380//===----------------------------------------------------------------------===//
1381// DecomposeInterfaceOp
1382//===----------------------------------------------------------------------===//
1383
1384def DecomposeInterfaceOp : Op<Transform_Dialect, "structured.decompose_interface",
1385    [FunctionalStyleTransformOpTrait,
1386     MemoryEffectsOpInterface,
1387     TransformOpInterface,
1388     TransformEachOpTrait,
1389     ReportTrackingListenerFailuresOpTrait]> {
1390  let description = [{
1391    TODO
1392  }];
1393
1394  let arguments = (ins TransformHandleTypeInterface:$target);
1395  let results = (outs TransformHandleTypeInterface:$transformed);
1396  let assemblyFormat =
1397      "$target attr-dict `:` functional-type(operands, results)";
1398
1399  let extraClassDeclaration = [{
1400    ::mlir::DiagnosedSilenceableFailure applyToOne(
1401        ::mlir::transform::TransformRewriter &rewriter,
1402        ::mlir::Operation *target,
1403        ::mlir::transform::ApplyToEachResultList &results,
1404        ::mlir::transform::TransformState &state);
1405  }];
1406}
1407//===----------------------------------------------------------------------===//
1408// RewriteInDestinationPassingStyleOp.
1409//===----------------------------------------------------------------------===//
1410
1411def RewriteInDestinationPassingStyleOp : Op<
1412    Transform_Dialect, "structured.rewrite_in_destination_passing_style",
1413    [FunctionalStyleTransformOpTrait,
1414     MemoryEffectsOpInterface,
1415     TransformOpInterface,
1416     TransformEachOpTrait,
1417     ReportTrackingListenerFailuresOpTrait]> {
1418  let description = [{
1419    Rewrite a supported tensor operation that is not in destination-passing style
1420    into a form that is in destination-passing style.
1421    Currently supported operations are:
1422      - tensor.pad
1423      - tensor.generate
1424      - tensor.from_elements
1425    This dichotomy hints at a future interface, for now the implementation just
1426    switches between different implementation.
1427
1428    #### Return modes
1429
1430    This operation ignores non-unsupported ops and drops them from the return.
1431    If all the operations referred to by the `target` handle generalize
1432    properly, the transform succeeds. Otherwise the transform produces a
1433    silenceable failure.
1434    The return handle points to a subset of successfully produced operations:
1435      - `tensor.pad` case, the returned handle points to the tensor.insert_slice.
1436      - `tensor.generate` case, the returned handle points to the linalg.generic.
1437      - `tensor.from_elements` case, the returned handle points to the last
1438        `tensor.insert`.
1439  }];
1440
1441  let arguments = (ins TransformHandleTypeInterface:$target);
1442  let results = (outs TransformHandleTypeInterface:$transformed);
1443  let assemblyFormat = [{
1444    $target attr-dict
1445    `:` functional-type($target, results)
1446  }];
1447
1448  let extraClassDeclaration = [{
1449    ::mlir::DiagnosedSilenceableFailure applyToOne(
1450        ::mlir::transform::TransformRewriter &rewriter,
1451        ::mlir::Operation *target,
1452        ::mlir::transform::ApplyToEachResultList &results,
1453        ::mlir::transform::TransformState &state);
1454  }];
1455}
1456
1457//===----------------------------------------------------------------------===//
1458// SplitOp
1459//===----------------------------------------------------------------------===//
1460
1461def SplitOp : Op<Transform_Dialect, "structured.split",
1462    [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1463     DeclareOpInterfaceMethods<TransformOpInterface>,
1464     ReportTrackingListenerFailuresOpTrait]> {
1465  let description = [{
1466    Splits the given `target` op into two or more complementary
1467    parts, which combined cover the entire iteration domain of the original op.
1468    The split is performed along the iteration space dimension provided as
1469    chunk size attribute specifying the size of the lower part; the remaining
1470    range in the iteration space is assigned as the upper part. In case of
1471    dimension overflow, the transformation fails. The split is performed at the
1472    dimension iterator value specified as either the static chunk size
1473    attribute when it is known at transform IR construction time or
1474    as the handle to an operation producing a single index-typed value
1475    when it is computed by payload IR. In the latter case, the chunk size
1476    point must be set to `ShapedType::kDynamic` and the dynamic size handle
1477    must point to as many value-producing operations as there are structured
1478    operations pointed to by the target handle.
1479
1480    The operation consumes the target handle, but preserves the chunk size
1481    handle if provided. Without the `multiway` attribute, it produces a
1482    new handle that is a list of the two parts of the structured op after
1483    splitting, whose lower index part corresponding to the part with lower
1484    iteration space indices.
1485
1486    Multiway split mode is enabled by specifying the `multiway` attribute.
1487    In this mode a single `target` op is split into multiple parts covering
1488    the iteration space of the specified dimension. `static_chunk_sizes` and
1489    `dynamic_chunk_sizes` in this case is a list of chunk sizes that the given
1490    dimension should be split into. With `multiway` it also produces a handle;
1491    The result handle is a list of the multiple parts of the structured op
1492    after splitting, where the target dimensions for each linalg op in the
1493    list corresponds to the chunk sizes specfied in the input split list.
1494    If the chunk sizes do not cover the entire iteration space, the leftover
1495    chunk is the last payload in the result handle.
1496
1497    As the result handle is most of time a list, an `transform.split_handle`
1498    is needed to access individual handle.
1499  }];
1500
1501  let arguments = (ins TransformHandleTypeInterface:$target,
1502                       I64Attr:$dimension,
1503                       Optional<TransformAnyParamTypeOrAnyHandle>:$dynamic_chunk_sizes,
1504                       I64Attr:$static_chunk_sizes,
1505                       UnitAttr:$multiway);
1506  let results = (outs TransformHandleTypeInterface:$split_list);
1507  let hasCustomAssemblyFormat = 1;
1508  let hasVerifier = 1;
1509}
1510
1511//===----------------------------------------------------------------------===//
1512// SplitReductionOp
1513//===----------------------------------------------------------------------===//
1514
1515def SplitReductionOp : Op<Transform_Dialect, "structured.split_reduction",
1516       [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
1517        TransformEachOpTrait, TransformOpInterface,
1518        ReportTrackingListenerFailuresOpTrait]> {
1519  let description = [{
1520    Indicates that the given `target` op should be transformed with the
1521    `splitReduction` transformation and split factor provided as attribute.
1522
1523    The `splitReduction` transformation splits the first single linalg op
1524    reduction into a parallel and reduction dimension.
1525    A new `linalg.generic` op is created to perform the rest of the reduction.
1526
1527    The transformation supports different configurations attributes:
1528      - split_factor: the factor by which to split (i.e. the size of the
1529        remaining reduction after splitting).
1530      - insert_split_dimension: the dimension in the temporary tensor into
1531        which the new parallel dimension is inserted.
1532      - inner_parallel: specifies whether the parallel dimension is before or
1533        after the reduction dimension in the splitting op.
1534      - use_scaling_algorithm: whether to use a scaling based formulation that
1535        does not create an ExpandShapeOp (default: do not use scaling)
1536      - use_alloc: whether to use an alloc op to allocate the temporary
1537        tensor (default: do not use alloc op)
1538
1539    #### Return modes
1540
1541    This operation ignores non-Linalg ops and drops them in the return.
1542    This operation produces a definite failure if the splitting fails for any
1543    reason.
1544
1545    If all the operations referred to by the `target` handle split
1546    properly, the transform succeeds. Otherwise the transform produces a
1547    silenceable failure.  The 4 returned handles points to only the subset of
1548    successfully produced computational operations, which can all be empty.
1549    This 4 returned handles point to:
1550      - the init op (or tensor_alloc op if use_alloc = true),
1551      - the fill op used to initialize the neutral element,
1552      - the split op and
1553      - the result-combining op.
1554
1555    #### Example (default: `use_scaling_algorithm = false, use_alloc = false`):
1556
1557    ```
1558      %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
1559                                            affine_map<(d0) -> ()>],
1560            iterator_types = ["reduction"]}
1561      ins(%in : tensor<32xf32>)
1562      outs(%out : tensor<f32>) {
1563      ^bb0(%arg1: f32, %arg2: f32):
1564        %y = arith.addf %arg1, %arg2 : f32
1565        linalg.yield %y : f32
1566      } -> tensor<f32>
1567    ```
1568
1569    is split into:
1570
1571    ```
1572      %cst = arith.constant 0.000000e+00 : f32
1573      %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
1574      %1 = tensor.empty() : tensor<4xf32>
1575      %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
1576      %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
1577                                            affine_map<(d0, d1) -> (d0)>],
1578        iterator_types = ["parallel", "reduction"]}
1579        ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) {
1580        ^bb0(%arg3: f32, %arg5: f32):
1581        %5 = arith.addf %arg3, %arg4 : f32
1582        linalg.yield %5 : f32
1583      } -> tensor<4xf32>
1584      %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
1585                                            affine_map<(d0) -> ()>],
1586        iterator_types = ["reduction"]}
1587        ins(%3 : tensor<4xf32>) outs(%out : tensor<f32>) {
1588        ^bb0(%arg3: f32, %arg4: f32):
1589        %5 = arith.addf %arg3, %arg4 : f32
1590        linalg.yield %5 : f32
1591      } -> tensor<f32>
1592    ```
1593
1594    #### Example (`use_scaling_algorithm = true, use_alloc = true`):
1595
1596    Instead of introducing an ExpandShapeOp, this scaling-based implementation
1597    rewrites a reduction dimension `k` into `k * split_factor + kk`.
1598    The dimension `kk` is added as an extra parallel dimension to the
1599    intermediate output tensor at position `insert_split_dimension`.
1600
1601    Consider a minimal example where `k` is reduced:
1602        O(i, j) += I(i, j, k)
1603    Assume i=3, j=5, k=128, split_factor=16 and insert_split_dimension=0.
1604    The compute is rewritten as:
1605      a. O_i(kk, i, j) += I(i, j, 16 * k + kk)
1606      b. O(i, j) += O_i(kk, i, j)
1607    The intermediate tensor O_i is of shape (128/16)x3x5 == 8x3x5.
1608
1609    #### Example:
1610
1611    ```
1612     %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)
1613       outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
1614    ```
1615
1616    Is transformed to:
1617
1618    ```
1619     #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d2 * 4 + d3)>
1620     #map1 = affine_map<(d0, d1, d2, d3) -> (d2 * 4 + d3, d1)>
1621     #map2 = affine_map<(d0, d1, d2, d3) -> (d2, d3)>
1622     #map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
1623     #map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
1624     #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
1625     %0 = tensor.empty() : tensor<16x32x64xf32>
1626     %cst = arith.constant 0.000000e+00 : f32
1627     %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->
1628        tensor<16x32x64xf32>
1629     %2 = tensor.empty() : tensor<64x4xi1>
1630
1631     %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
1632       iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
1633       ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>)
1634       outs(%1 : tensor<16x32x64xf32>) {
1635         ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32):
1636           %5 = arith.mulf %arg3, %arg4 : f32
1637           %6 = arith.addf %arg6, %5 : f32
1638           linalg.yield %6 : f32
1639     } -> tensor<16x32x64xf32>
1640
1641     %4 = linalg.generic {indexing_maps = [#map4, #map5],
1642       iterator_types = ["parallel", "parallel", "reduction"]}
1643       ins(%3 : tensor<16x32x64xf32>)
1644       outs(%C : tensor<16x32xf32>) {
1645         ^bb0(%arg3: f32, %arg4: f32):
1646           %5 = arith.addf %arg3, %arg4 : f32
1647           linalg.yield %5 : f32
1648     } -> tensor<16x32xf32>
1649
1650     return %4 : tensor<16x32xf32>
1651    ```
1652  }];
1653
1654  let arguments = (ins TransformHandleTypeInterface:$target,
1655                   DefaultValuedAttr<I64Attr, "{}">:$split_factor,
1656                   DefaultValuedAttr<I64Attr, "{}">:$insert_split_dimension,
1657                   UnitAttr:$inner_parallel,
1658                   UnitAttr:$use_scaling_algorithm,
1659                   UnitAttr:$use_alloc);
1660  let results = (outs TransformHandleTypeInterface:$init_or_alloc_op,
1661                      TransformHandleTypeInterface:$fill_op,
1662                      TransformHandleTypeInterface:$split_linalg_op,
1663                      TransformHandleTypeInterface:$combining_linalg_op);
1664
1665  let assemblyFormat =
1666      "$target attr-dict `:`"
1667      "functional-type(operands, results)";
1668
1669  let builders = [
1670    OpBuilder<(ins "Value":$target,
1671                   "int64_t":$splitFactor,
1672                   "int64_t":$insertSplitDimension,
1673                   CArg<"bool", "false">:$innerParallel,
1674                   CArg<"bool", "false">:$useScalingAlgorithm,
1675                   CArg<"bool", "false">:$useAlloc)>
1676  ];
1677
1678  let extraClassDeclaration = [{
1679    ::mlir::DiagnosedSilenceableFailure applyToOne(
1680        ::mlir::transform::TransformRewriter &rewriter,
1681        ::mlir::linalg::LinalgOp target,
1682        ::mlir::transform::ApplyToEachResultList &results,
1683        ::mlir::transform::TransformState &state);
1684  }];
1685}
1686
1687//===----------------------------------------------------------------------===//
1688// TileReductionUsingForOp
1689//===----------------------------------------------------------------------===//
1690
1691def TileReductionUsingForOp : Op<Transform_Dialect, "structured.tile_reduction_using_for",
1692       [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
1693        TransformEachOpTrait, TransformOpInterface,
1694        ReportTrackingListenerFailuresOpTrait]> {
1695  let description = [{
1696    Indicates that the given `target` op should be transformed with the
1697    `tileReduction` transformation with the tile size provided as attribute.
1698
1699    This transformation tiles the `target` along the reduction dimensions. It
1700    creates a tensor initialized with the identity value. Then it creates nested
1701    loops with a parallel version of `target` op inside. The parallel op
1702    dimensions are less or equal to the tile size passed by user.
1703    After the loop a merge operation is created to do a final reduction with the
1704    partial reductions.
1705    The initial tensor always uses the tile size dimension. This may overallocate
1706    if the tile size is greater than the reduction dimension.
1707
1708    #### Return modes
1709
1710    Returns 4 handles associated with (in order):
1711      - the fill op used to initialize the neutral element,
1712      - the parallel tiled op and
1713      - the result-combining op,
1714      - the parent `for` op.
1715
1716    #### Example:
1717
1718    ```
1719      %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
1720                                              affine_map<(d0, d1) -> (d0)>],
1721      iterator_types = ["parallel", "reduction"]}
1722      ins(%arg0 : tensor<?x?xf32>)
1723      outs(%out : tensor<?xf32>) {
1724        ^bb0(%arg7: f32, %arg9: f32):
1725        %1 = arith.addf %arg7, %arg9 : f32
1726        linalg.yield %1 : f32
1727      } -> tensor<?xf32>
1728      return %red : tensor<?xf32>
1729    ```
1730
1731    is transformed into:
1732
1733    ```
1734      %0 = tensor.empty(%dim_1) : tensor<?x5xf32>
1735      %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
1736      %2 = scf.for %arg2 = %c0 to %dim_0 step %c5 iter_args(%arg3 = %1) -> (tensor<?x5xf32>) {
1737        %extracted_slice = tensor.extract_slice %1[0, 0] [%dim, 5] [1, 1] : tensor<?x5xf32> to tensor<?x5xf32>
1738        %extracted_slice_2 = tensor.extract_slice %arg0[0, %arg2] [%dim, 5] [1, 1] : tensor<?x?xf32> to tensor<?x5xf32>
1739        %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
1740                                              affine_map<(d0, d1) -> (d0, d1)>],
1741        iterator_types = ["parallel", "parallel"]}
1742        ins(%extracted_slice_2 : tensor<?x5xf32>)
1743        outs(%extracted_slice : tensor<?x5xf32>) {
1744        ^bb0(%in: f32, %out: f32):
1745          %5 = arith.addf %in, %out : f32
1746          linalg.yield %5 : f32
1747        } -> tensor<?x5xf32>
1748        %dim_3 = tensor.dim %1, %c0 : tensor<?x5xf32>
1749        %inserted_slice = tensor.insert_slice %4 into %arg3[0, 0] [%dim_3, 5] [1, 1] : tensor<?x5xf32> into tensor<?x5xf32>
1750        scf.yield %inserted_slice : tensor<?x5xf32>
1751      }
1752      %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
1753                                            affine_map<(d0, d1) -> (d0)>],
1754      iterator_types = ["parallel", "reduction"]}
1755      ins(%2 : tensor<?x5xf32>)
1756      outs(%arg1 : tensor<?xf32>) {
1757      ^bb0(%in: f32, %out: f32):
1758        %4 = arith.addf %in, %out : f32
1759        linalg.yield %4 : f32
1760      } -> tensor<?xf32>
1761    ```
1762  }];
1763
1764  // TODO: support mixed static-dynamic (see TileUsingForallOp).
1765  let arguments = (ins TransformHandleTypeInterface:$target,
1766                   DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$tile_sizes);
1767  let results = (outs Variadic<TransformHandleTypeInterface>:$fill_op,
1768                      TransformHandleTypeInterface:$split_op,
1769                      TransformHandleTypeInterface:$combining_op,
1770                      TransformHandleTypeInterface:$for_op);
1771
1772  let builders = [
1773    OpBuilder<(ins "Value":$target,
1774                   "ArrayRef<int64_t>":$staticTileSizes)>
1775  ];
1776
1777  let assemblyFormat = [{
1778    $target
1779    `by` `tile_sizes` `=` $tile_sizes
1780    attr-dict
1781    `:` functional-type(operands, results)
1782  }];
1783
1784  let extraClassDeclaration = [{
1785    ::mlir::DiagnosedSilenceableFailure applyToOne(
1786        ::mlir::transform::TransformRewriter &rewriter,
1787        Operation *target,
1788        ::mlir::transform::ApplyToEachResultList &results,
1789        ::mlir::transform::TransformState &state);
1790  }];
1791}
1792
1793//===----------------------------------------------------------------------===//
1794// TileReductionUsingForallOp
1795//===----------------------------------------------------------------------===//
1796
1797def TileReductionUsingForallOp :
1798  Op<Transform_Dialect, "structured.tile_reduction_using_forall",
1799       [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
1800        TransformEachOpTrait, TransformOpInterface,
1801        ReportTrackingListenerFailuresOpTrait]> {
1802  let description = [{
1803    Tile a PartialReductionOpInterface op to a tiled `scf.forall` doing
1804    partial reduction.
1805
1806    This transformation tiles the `target` along the reduction dimensions. It
1807    creates a tensor initialized with the identity value. Then it creates a
1808    `scf.forall` loops with the number threads given by `num_threads`.
1809    The op is tiled op with a size equal to `floordiv(size, num_threads)`.
1810    All the partial reduction value is are parallel inserted to create a new
1811    tensor. After the loop a merge operation is created to do a final reduction
1812    with the partial reductions tensor.
1813    If an extra `tile_sizes` parameter is passed the tiles are cyclically
1814    distributed on the threads of the `scf.foralls` loop.
1815
1816    #### Return modes
1817
1818    Returns 4 handles associated with (in order):
1819      - the fill op used to initialize the neutral element,
1820      - the parallel tiled op and
1821      - the result-combining op,
1822      - the parent `forall` op.
1823
1824    #### Example:
1825
1826    ```
1827      %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
1828                                              affine_map<(d0, d1) -> (d0)>],
1829      iterator_types = ["parallel", "reduction"]}
1830      ins(%arg0 : tensor<?x?xf32>)
1831      outs(%out : tensor<?xf32>) {
1832        ^bb0(%arg7: f32, %arg9: f32):
1833        %1 = arith.addf %arg7, %arg9 : f32
1834        linalg.yield %1 : f32
1835      } -> tensor<?xf32>
1836      return %red : tensor<?xf32>
1837    ```
1838
1839    is transformed into:
1840
1841    ```
1842      %0 = tensor.empty(%dim_1) : tensor<?x5xf32>
1843      %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
1844      %2 = scf.forall (%arg2) in (%c5) shared_outs(%arg3 = %1) -> (tensor<?x5xf32>) {
1845        %4 = affine.min #map(%arg2)[%dim_0]
1846        %5 = affine.max #map1(%4)
1847        %extracted_slice = tensor.extract_slice %arg3[0, %arg2] [%dim, 1] [1, 1] : tensor<?x5xf32> to tensor<?xf32>
1848        %6 = affine.apply #map2(%arg2)[%dim_0]
1849        %extracted_slice_2 = tensor.extract_slice %arg0[0, %6] [%dim, %5] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
1850        %extracted_slice_3 = tensor.extract_slice %extracted_slice[0] [%dim] [1] : tensor<?xf32> to tensor<?xf32>
1851        %7 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%extracted_slice_2 : tensor<?x?xf32>) outs(%extracted_slice_3 : tensor<?xf32>) {
1852        ^bb0(%in: f32, %out: f32):
1853          %9 = arith.addf %in, %out : f32
1854          linalg.yield %9 : f32
1855        } -> tensor<?xf32>
1856        scf.forall.in_parallel {
1857          tensor.parallel_insert_slice %7 into %arg3[0, %arg2] [%dim, 1] [1, 1] : tensor<?xf32> into tensor<?x5xf32>
1858        }
1859      } {mapping = []}
1860      %3 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor<?x5xf32>) outs(%arg1 : tensor<?xf32>) {
1861      ^bb0(%in: f32, %out: f32):
1862        %4 = arith.addf %in, %out : f32
1863        linalg.yield %4 : f32
1864      } -> tensor<?xf32>
1865    ```
1866  }];
1867
1868  // TODO: support mixed static-dynamic (see TileUsingForallOp).
1869  let arguments = (ins TransformHandleTypeInterface:$target,
1870                   DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$num_threads,
1871                   DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$tile_sizes,
1872                   OptionalAttr<DeviceMappingArrayAttr>:$mapping);
1873  let results = (outs Variadic<TransformHandleTypeInterface>:$fill_op,
1874                      TransformHandleTypeInterface:$split_linalg_op,
1875                      TransformHandleTypeInterface:$combining_linalg_op,
1876                      TransformHandleTypeInterface:$forall_op);
1877
1878  let builders = [
1879    OpBuilder<(ins "Value":$target,
1880                   "ArrayRef<int64_t>":$staticNumThreads,
1881                   "ArrayRef<int64_t>":$staticTileSizes,
1882                   CArg<"ArrayAttr", "{}">:$mapping)>
1883  ];
1884
1885  let assemblyFormat = [{
1886    $target
1887    `by`
1888    (`num_threads` `=` $num_threads^)?
1889    (`,` `tile_sizes` `=` $tile_sizes^)?
1890    (`,` `mapping` `=` $mapping^)?
1891    attr-dict
1892    `:` functional-type(operands, results)
1893  }];
1894
1895  let extraClassDeclaration = [{
1896    ::mlir::DiagnosedSilenceableFailure applyToOne(
1897        ::mlir::transform::TransformRewriter &rewriter,
1898        ::mlir::linalg::LinalgOp target,
1899        ::mlir::transform::ApplyToEachResultList &results,
1900        ::mlir::transform::TransformState &state);
1901  }];
1902
1903}
1904
1905//===----------------------------------------------------------------------===//
1906// ContinuousTileSizesOp
1907//===----------------------------------------------------------------------===//
1908
1909def ContinuousTileSizesOp : Op<Transform_Dialect, "structured.continuous_tile_sizes",
1910       [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1911        DeclareOpInterfaceMethods<TransformOpInterface>,
1912        ReportTrackingListenerFailuresOpTrait]> {
1913  let description = [{
1914    This transform emits the IR computing the list of (1) exponentially
1915    diminishing tile sizes that are powers of 2; and (2) the corresponding
1916    chunk-sizes the target op should be split into along the given dimension.
1917
1918    For example, for `target_size` 9, and `dimension` 0 for the following
1919    linalg op as target
1920
1921    ```
1922      %0 = linalg.matmul  ins(%arg0, %arg1: tensor<25x34xf32>, tensor<34x25xf32>)
1923                      outs(%arg2: tensor<25x25xf32>)
1924    ```
1925
1926    the first result `tile_sizes` will be a list of diminishing tile sizes
1927    9, 4, 2, 1; and the second result will be a list of chunk sizes
1928    18, 4, 2, 1 that the corresponding dimension should be split into.
1929
1930    After the target op has been split along the given dimension (for example
1931    using multiway split), each chunk can be tiled with the corresponding tile
1932    size in the `tile_sizes` list generated as a result of this op.
1933
1934    Specifying the output type as !transform.param<i64> will cause `tile_sizes`
1935    and `chunk_sizes` to be computed statically and not dynamically.
1936  }];
1937
1938  let arguments = (ins TransformHandleTypeInterface:$target,
1939                       ConfinedAttr<I64Attr, [IntNonNegative]>:$dimension,
1940                       ConfinedAttr<I64Attr, [IntNonNegative]>:$target_size);
1941  let results = (outs TransformAnyParamTypeOrAnyHandle:$tile_sizes,
1942                      TransformAnyParamTypeOrAnyHandle:$chunk_sizes);
1943  let hasVerifier = 1;
1944  let assemblyFormat =
1945    "$target attr-dict `:` custom<ContinuousTileSizeTypes>("
1946    "type($target), type($tile_sizes), type($chunk_sizes))";
1947
1948}
1949
1950//===----------------------------------------------------------------------===//
1951// TileUsingForOp
1952//===----------------------------------------------------------------------===//
1953
1954def TileUsingForOp : Op<Transform_Dialect, "structured.tile_using_for",
1955       [DeclareOpInterfaceMethods<TransformOpInterface>,
1956        DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1957        ReportTrackingListenerFailuresOpTrait]> {
1958  let description = [{
1959    Indicates that the given `target` op should be tiled with the given sizes.
1960    This transform generates a loop nest with a smaller ("tiled") target
1961    operation in its body. Currently limited to LinalgOps.
1962
1963    Tile sizes may be known at transformation time, in which case they are
1964    expected to be provided in the `static_size` attribute, or not, in which
1965    case the tile value must be computed by the payload IR and the handle to the
1966    operation computing it must be provided through `dynamic_sizes`. When the
1967    sizes are not known statically, the corresponding entry in the
1968    `static_sizes` attribute must be set to `ShapedType::kDynamic`. Only
1969    the dynamic sizes must be provided in `dynamic_sizes`, i.e., there should
1970    be as many handles as `ShapedType::kDynamic` values in the
1971    `static_sizes` attribute. A static size of `0` indicates that the dimension
1972    should not be tiled. No loop will be generated for such dimensions. If all
1973    tile sizes are `0`, this transform is effectively a no-op.
1974
1975    This op returns handles to the tiled op (in the generated loop nest) and the
1976    generated loops. The number of loops is the number of tile sizes that are
1977    statically known to be non-zero.
1978
1979    #### Return modes
1980
1981    On success, the resulting handles are associated with co-indexed lists of
1982    tiled operations and loops around them.
1983
1984    This operation only supports Linalg ops and produces a silenceable failure
1985    if the input contains any non-Linalg ops. The ops preceding it in the list
1986    associated with the `target` handle will have been tiled.
1987
1988    This operation produces a silenceable failure if the `dynamic_sizes` handles
1989    are associated with lists of payload operations of a size different than
1990    that of the list associated with the `target` handle.
1991
1992    If the internal implementation of tiling for any of the operations fails,
1993    produces a definite failure.
1994  }];
1995
1996  let arguments = (ins TransformHandleTypeInterface:$target,
1997                   Variadic<TransformAnyParamTypeOrAnyHandle>:$dynamic_sizes,
1998                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sizes,
1999                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$interchange,
2000                   DefaultValuedOptionalAttr<DenseBoolArrayAttr, "{}">:$scalable_sizes);
2001  let results = (outs TransformHandleTypeInterface:$tiled_linalg_op,
2002                      Variadic<TransformHandleTypeInterface>:$loops);
2003  let builders = [
2004    OpBuilder<(ins "TypeRange":$loopTypes,
2005                   "Value":$target,
2006                   "ArrayRef<int64_t>":$staticTileSizes,
2007                   CArg<"ArrayRef<int64_t>", "{}">:$interchange,
2008                   CArg<"std::optional<ArrayRef<bool>>", "std::nullopt">:
2009                      $scalableSizes)>,
2010    OpBuilder<(ins "TypeRange":$loopTypes,
2011                   "Value":$target,
2012                   "ArrayRef<OpFoldResult>":$mixedTileSizes,
2013                   CArg<"ArrayRef<int64_t>", "{}">:$interchange,
2014                   CArg<"std::optional<ArrayRef<bool>>", "std::nullopt">:
2015                      $scalableSizes)>,
2016    OpBuilder<(ins "Value":$target,
2017                   "ArrayRef<int64_t>":$staticTileSizes,
2018                   CArg<"ArrayRef<int64_t>", "{}">:$interchange,
2019                   CArg<"std::optional<ArrayRef<bool>>", "std::nullopt">:
2020                      $scalableSizes)>,
2021    OpBuilder<(ins "Value":$target,
2022                   "ArrayRef<OpFoldResult>":$mixedTileSizes,
2023                   CArg<"ArrayRef<int64_t>", "{}">:$interchange,
2024                   CArg<"std::optional<ArrayRef<bool>>", "std::nullopt">:
2025                      $scalableSizes)>,
2026  ];
2027
2028  let assemblyFormat = [{
2029    $target
2030      `tile_sizes` custom<DynamicIndexList>(
2031        $dynamic_sizes,
2032        $static_sizes,
2033        $scalable_sizes)
2034      (`interchange` `=` $interchange^)?
2035    attr-dict
2036    `:` functional-type(operands, results)
2037  }];
2038
2039  let hasVerifier = 1;
2040
2041  let extraClassDeclaration = [{
2042    /// Returns the list of tile sizes, which may be static (Attribute) or
2043    /// dynamic (Value).
2044    SmallVector<OpFoldResult> getMixedSizes();
2045  }];
2046}
2047
2048//===----------------------------------------------------------------------===//
2049// TileUsingForallOp
2050//===----------------------------------------------------------------------===//
2051
2052def TileUsingForallOp :
2053    Op<Transform_Dialect, "structured.tile_using_forall",
2054      [AttrSizedOperandSegments,
2055       DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
2056       TransformOpInterface, ReportTrackingListenerFailuresOpTrait]> {
2057  let description = [{
2058    Tile a TilingInterface op to a tiled `scf.forall`.
2059
2060    Tiling is applied by either specifying `num_threads` or `tile_size`. If
2061    `num_threads` is specified, then the tile size for each dimension `i` is
2062    calculated dynamically via `ceilDiv(dimSize[i], num_threads[i])`.
2063    `num_threads` and `tile_size` can be either static index attributes or
2064    operation handles (or a mix thereof). Operation handles must be mapped to
2065    exactly one op that has exactly one result of index type.
2066
2067    Static zero tile sizes indicate that the dimension is not tiled and can be
2068    thought of as tiling by the full size of data.
2069
2070    It is the user's responsibility to ensure that `num_threads/tile_sizes` is
2071    a valid tiling specification (i.e. that only tiles parallel dimensions,
2072    e.g. in the Linalg case). If the dimension is not parallelizable, a warning
2073    is issued to notify the user that the generated code is not safe to
2074    parallelize.
2075
2076    If non-empty, the `mapping` is added as an attribute to the
2077    resulting `scf.forall`.
2078
2079    Note: `tile_sizes` and `num_threads` are variadic. Each tile size/number of
2080    threads can be an index attribute or a transform handle that is mapped to
2081    exactly one payload op with exactly one index result.
2082
2083    #### Return modes
2084
2085    This operation ignores ops that do not implement the TilingInterface and
2086    drops them in the return.
2087
2088    If all the operations referred to by the `target` handle tile
2089    successfully, the transform succeeds.
2090    Otherwise the transform produces a silenceable failure.
2091
2092    The two returned handles point to only the subset of successfully produced
2093    tiled operations, which can all be empty.
2094
2095    These two returned handles point to:
2096      - the tiled op that implements TilingInterface,
2097      - the new scf.forall op.
2098
2099    #### Example using `num_threads`
2100
2101    ```
2102    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
2103       : (!transform.any_op) -> !transform.any_op
2104    %3:2 = transform.structured.tile_using_forall %0 num_threads [10, 20]
2105       : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
2106    ```
2107
2108    #### Example using `tile_sizes`
2109
2110    ```
2111    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
2112       : (!transform.any_op) -> !transform.any_op
2113    %sz = transform.structured.match ...
2114    %3:2 = transform.structured.tile_using_forall %0 tile_sizes [0, %sz, 20]
2115       : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
2116    ```
2117  }];
2118
2119  let arguments = (ins TransformHandleTypeInterface:$target,
2120                   Variadic<TransformAnyParamTypeOrAnyHandle>:$num_threads,
2121                   Variadic<TransformAnyParamTypeOrAnyHandle>:$tile_sizes,
2122                   Optional<TransformAnyParamTypeOrAnyHandle>:$packed_num_threads,
2123                   Optional<TransformAnyParamTypeOrAnyHandle>:$packed_tile_sizes,
2124                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_num_threads,
2125                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_tile_sizes,
2126                   OptionalAttr<DeviceMappingArrayAttr>:$mapping);
2127  let results = (outs TransformHandleTypeInterface:$tiled_op,
2128                      TransformHandleTypeInterface:$forall_op);
2129
2130  let builders = [
2131    OpBuilder<(ins "Value":$target,
2132                   "ArrayRef<int64_t>":$staticTileSizes,
2133                   CArg<"::mlir::transform::TileSizesSpec",
2134                        "::mlir::transform::TileSizesSpec()">,
2135                   CArg<"ArrayAttr", "{}">:$mapping)>,
2136    OpBuilder<(ins "Value":$target,
2137                   "ArrayRef<OpFoldResult>":$mixedTileSizes,
2138                   CArg<"::mlir::transform::TileSizesSpec",
2139                        "::mlir::transform::TileSizesSpec()">,
2140                   CArg<"ArrayAttr", "{}">:$mapping)>,
2141    OpBuilder<(ins "Value":$target,
2142                   "ArrayRef<int64_t>":$staticNumThreads,
2143                   CArg<"::mlir::transform::NumThreadsSpec",
2144                        "::mlir::transform::NumThreadsSpec()">,
2145                   CArg<"ArrayAttr", "{}">:$mapping)>,
2146    OpBuilder<(ins "Value":$target,
2147                   "ArrayRef<OpFoldResult>":$mixedNumThreads,
2148                   CArg<"::mlir::transform::NumThreadsSpec",
2149                        "::mlir::transform::NumThreadsSpec()">,
2150                   CArg<"ArrayAttr", "{}">:$mapping)>
2151  ];
2152
2153  let assemblyFormat = [{
2154    $target oilist(
2155        `num_threads` custom<PackedOrDynamicIndexList>($packed_num_threads,
2156                                                       $num_threads,
2157                                                       $static_num_threads) |
2158         `tile_sizes` custom<PackedOrDynamicIndexList>($packed_tile_sizes,
2159                                                       $tile_sizes,
2160                                                       $static_tile_sizes))
2161    (`(` `mapping` `=` $mapping^ `)`)? attr-dict
2162    `:` functional-type(operands, results)
2163  }];
2164  let hasVerifier = 1;
2165
2166  let extraClassDeclaration = [{
2167    ::mlir::DiagnosedSilenceableFailure apply(
2168        ::mlir::transform::TransformRewriter &rewriter,
2169        ::mlir::transform::TransformResults &transformResults,
2170        ::mlir::transform::TransformState &state);
2171
2172    ::llvm::SmallVector<::mlir::OpFoldResult> getMixedNumThreads();
2173    ::llvm::SmallVector<::mlir::OpFoldResult> getMixedTileSizes();
2174  }];
2175}
2176
2177//===----------------------------------------------------------------------===//
2178// VectorizeChildrenAndApplyPatternsOp
2179//===----------------------------------------------------------------------===//
2180
2181def VectorizeChildrenAndApplyPatternsOp :
2182  Op<Transform_Dialect, "structured.vectorize_children_and_apply_patterns",
2183    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
2184     TransformEachOpTrait, TransformOpInterface,
2185     ReportTrackingListenerFailuresOpTrait]> {
2186  let description = [{
2187    Vectorizes all children contained in the given `target` using the
2188    configuration specified by the attributes of this op. This only vectorizes
2189    structured ops that operate on shaped types and does not vectorize loops or
2190    straight-line. Internally, it applies a set of rewrite patterns, some of
2191    which enable vectorization and some of which clean up the results.
2192    Therefore, it can only be applied to an op with the "isolated from above"
2193    property. This transformation only fails if the entire pattern rewriting
2194    failed, i.e., it does **not** fail when no ops were vectorized.
2195
2196    Finer granularity can be achieved either with the `VectorizeOp` for
2197    individual ops or by outlining the target part of the payload IR into, e.g.,
2198    a function, performing this transformation, and inlining it back.
2199
2200    Note that this transformation invalidates the handles to any payload IR
2201    operation that is contained inside the vectorization target.
2202
2203    This transformation supports the following attributes:
2204    - `vectorize_padding`: a `UnitAttr` to activate the vectorization of
2205      `tensor.pad` ops. Different pipelines may prefer to lower such ops to
2206      loops.
2207    - `disable_multi_reduction_to_contract_patterns`: a `UnitAttr` to deactivate
2208      the rewrite of `vector.multi_reduction` to `vector.contract`. This is
2209      intended to be used in tests only.
2210    - `disable_transfer_permutation_map_lowering_patterns`: a `UnitAttr` to
2211      deactivate the rewrite of `vector.transfer` with permutation maps into
2212      explicit `vector.transpose` operations. This is intended to be used in
2213      tests only but may be promoted to a first class attribute in the future.
2214
2215    #### Return modes:
2216
2217    This operation produces a definite failure if vectorization fails for any
2218    reason.
2219    The operation always returns the handle to the target op that is expected
2220    to be isolated from above.
2221  }];
2222
2223  let arguments = (ins TransformHandleTypeInterface:$target,
2224                   UnitAttr:$vectorize_padding,
2225                   UnitAttr:$vectorize_nd_extract,
2226                   UnitAttr:$flatten_1d_depthwise_conv,
2227                   UnitAttr:$disable_multi_reduction_to_contract_patterns,
2228                   UnitAttr:$disable_transfer_permutation_map_lowering_patterns);
2229  let results = (outs TransformHandleTypeInterface:$transformed);
2230
2231  let assemblyFormat =
2232      "$target attr-dict `:`"
2233      "functional-type(operands, results)";
2234
2235  let builders = [
2236    OpBuilder<(ins "Value":$target,
2237               CArg<"bool", "false">:$vectorizePadding,
2238               CArg<"bool", "false">:$vectorizeNDExtract,
2239               CArg<"bool", "false">:$flatten1DDepthwise)>
2240  ];
2241  let extraClassDeclaration = [{
2242    ::mlir::DiagnosedSilenceableFailure applyToOne(
2243        ::mlir::transform::TransformRewriter &rewriter,
2244        ::mlir::Operation *target,
2245        ::mlir::transform::ApplyToEachResultList &results,
2246        ::mlir::transform::TransformState &state);
2247  }];
2248}
2249
2250def VectorizeOp : Op<Transform_Dialect, "structured.vectorize",
2251    [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
2252     TransformOpInterface, ReportTrackingListenerFailuresOpTrait]> {
2253  let description = [{
2254    Vectorize the target ops, which must be Linalg ops.
2255
2256    Use the optional vector sizes to specify exactly what configuration the
2257    vectorizer should use. It will then use masked vectors of the specified
2258    size to enforce this configuration ("masked vectorization"). If no vector
2259    sizes are specified, the vectorizer will infer the shapes to use from the
2260    target Linalg ops ("regular vectorization"). More specifically:
2261
2262    ```mlir
2263    # Masked vectorization - vector sizes are specified explicitly
2264    transform.structured.vectorize %target vector_sizes [1, 4] : !transform.any_op
2265    # Regular vectorization - vector sizes are inferred from the target Op
2266    transform.structured.vectorize %target : !transform.any_op
2267    ```
2268
2269    The vector sizes can be either static or dynamic (SSA values). In case of
2270    SSA values, the handle must be mapped to exactly one payload op with
2271    exactly one index-typed result.
2272
2273    Note: The input vector sizes must be bigger than or equal to their
2274    counterpart iteration space sizes.
2275
2276    Typically this operator should be applied to linalg operations that have
2277    already been tiled to the appropriate sizes.
2278
2279    #### Return modes:
2280
2281    This operation produces a silenceable failure if at least one target op is
2282    not a Linalg op or fails to vectorize. It produces a definite failure if
2283    the dynamic vector sizes (SSA values) do not satisfy the constraints
2284    mentioned above.
2285  }];
2286
2287  let arguments = (ins TransformHandleTypeInterface:$target,
2288                       Variadic<TransformAnyParamTypeOrAnyHandle>:$vector_sizes,
2289                       DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:
2290                          $static_vector_sizes,
2291                       OptionalAttr<UnitAttr>:$vectorize_nd_extract,
2292                       DefaultValuedOptionalAttr<DenseBoolArrayAttr, "{}">:
2293                          $scalable_sizes);
2294
2295  let results = (outs);
2296
2297  // We use oilist here to elide the optional `vector_sizes` when empty list
2298  // is passed.
2299  let assemblyFormat = [{
2300    $target oilist(
2301      `vector_sizes` custom<DynamicIndexList>(
2302        $vector_sizes,
2303        $static_vector_sizes,
2304        $scalable_sizes))
2305    attr-dict
2306    `:` type($target)(`,`type($vector_sizes)^)?
2307  }];
2308
2309  let hasVerifier = 1;
2310
2311  let extraClassDeclaration = [{
2312    // TODO: applyToOne.
2313    ::mlir::DiagnosedSilenceableFailure apply(
2314        ::mlir::transform::TransformRewriter &rewriter,
2315        ::mlir::transform::TransformResults &transformResults,
2316        ::mlir::transform::TransformState &state);
2317
2318    ::llvm::SmallVector<::mlir::OpFoldResult> getMixedVectorSizes();
2319  }];
2320}
2321
2322//===----------------------------------------------------------------------===//
2323// HoistRedundantVectorTransfersOp
2324//===----------------------------------------------------------------------===//
2325
2326def HoistRedundantVectorTransfersOp :
2327  Op<Transform_Dialect, "structured.hoist_redundant_vector_transfers",
2328    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
2329     TransformEachOpTrait, TransformOpInterface,
2330     ReportTrackingListenerFailuresOpTrait]> {
2331  let description = [{
2332    Hoist vector.transfer_read / vector.transfer_write pairs out of immediately
2333    enclosing scf::ForOp iteratively, if the following conditions are true:
2334       1. The 2 ops access the same memref with the same indices.
2335       2. All operands are invariant under the enclosing scf::ForOp.
2336       3. No uses of the memref either dominate the transfer_read or are
2337       dominated by the transfer_write (i.e. no aliasing between the write and
2338       the read across the loop)
2339
2340    WARNING: This hoisting does not model parallelism and is generally incorrect
2341    when used on distributed loops with memref semantics!
2342    TODO: obsolete and should be retired.
2343
2344    #### Return modes:
2345
2346    The operation always succeeds and returns a handle to the transformed
2347    function op.
2348  }];
2349
2350  let arguments = (ins TransformHandleTypeInterface:$target,
2351                   UnitAttr:$verify_non_zero_trip);
2352  let results = (outs TransformHandleTypeInterface:$transformed);
2353
2354  let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) ";
2355
2356  let builders = [
2357    OpBuilder<(ins "Value":$target,
2358               CArg<"bool", "false">:$verify_non_zero_trip)>,
2359  ];
2360  let extraClassDeclaration = [{
2361    ::mlir::DiagnosedSilenceableFailure applyToOne(
2362         ::mlir::transform::TransformRewriter &rewriter,
2363         ::mlir::func::FuncOp target,
2364         ::mlir::transform::ApplyToEachResultList &results,
2365         ::mlir::transform::TransformState &state);
2366   }];
2367}
2368
2369//===----------------------------------------------------------------------===//
2370// HoistRedundantVectorBroadcastsOp
2371//===----------------------------------------------------------------------===//
2372
2373def HoistRedundantVectorBroadcastsOp :
2374  Op<Transform_Dialect, "structured.hoist_redundant_vector_broadcasts",
2375    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
2376     TransformEachOpTrait, TransformOpInterface,
2377     ReportTrackingListenerFailuresOpTrait]> {
2378  let description = [{
2379    Hoist vector.extract / vector.broadcasts pairs out of immediately
2380    enclosing scf::ForOp iteratively.
2381
2382    #### Return modes:
2383
2384    The operation always succeeds and returns a handle to the transformed
2385    function op.
2386  }];
2387
2388  let arguments = (ins TransformHandleTypeInterface:$target);
2389  let results = (outs TransformHandleTypeInterface:$transformed);
2390
2391  let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) ";
2392
2393  let builders = [
2394    OpBuilder<(ins "Value":$target)>,
2395  ];
2396  let extraClassDeclaration = [{
2397    ::mlir::DiagnosedSilenceableFailure applyToOne(
2398         ::mlir::transform::TransformRewriter &rewriter,
2399         ::mlir::Operation *target,
2400         ::mlir::transform::ApplyToEachResultList &results,
2401         ::mlir::transform::TransformState &state);
2402   }];
2403}
2404
2405//===----------------------------------------------------------------------===//
2406// ConvertConv2DToImg2ColOp
2407//===----------------------------------------------------------------------===//
2408
2409def ConvertConv2DToImg2ColOp : Op<Transform_Dialect,
2410    "structured.convert_conv2d_to_img2col",
2411    [FunctionalStyleTransformOpTrait,
2412     MemoryEffectsOpInterface,
2413     TransformOpInterface,
2414     TransformEachOpTrait,
2415     ReportTrackingListenerFailuresOpTrait]> {
2416  let description = [{
2417    Convert linalg.conv_2d_xxx into linalg.generic (for img2col packing)
2418    and linalg.matmul.
2419
2420    A convolution operation can be written as a matrix-matrix multiplication by
2421    unfolding the cross-correlation between input and filter and explicitly copy
2422    overlapped sliding window inputs.
2423
2424    Consider 2D input X with single channel input and output and 2x2 filter W:
2425    ```
2426    [x(0, 0)  , x(0, 1)  , ...,   x(0, n)  ]
2427    [x(1, 0)  , x(1, 1)  , ...,   x(1, n)  ]
2428    [.        ,  .       ,.   ,      .     ]            [w(0, 0), w(0, 1)]
2429    [.        ,  .       , .  ,      .     ]    (conv)  [w(1, 0), w(1, 1)]
2430    [.        ,  .       ,   .,      .     ]
2431    [x(n-1, 0), x(n-1, 1), ..., x(n-1, n-1)]
2432    ```
2433
2434    The packed input data (img2col) is a matrix with |rows| = output spatial
2435    size, |columns| = filter spatial size. To compute the output Y(i, j) we need
2436    to calculate the dot product between filter window at input X(x, y)) and the
2437    filter which will look like the following where r.h.s is the img2col matrix
2438    and l.h.s is the flattned filter:
2439    ```
2440    [x(0,0), x(0,1), x(1,0), x(1,1)]
2441    [x(0,1), x(1,1), x(0,2), x(1,2)] (matmul) [w(0,0), w(0,1), w(1,0), w(1,1)]
2442    [x(0,1), x(1,1), x(0,2), x(1,2)]
2443    [   .  ,    .  ,    .  ,    .  ]
2444    ```
2445
2446    In general for 2D case with (N, H, W, C) input and (Kh, Kw, C, D) filter
2447    and output (N, Ho, Wo, D) the convolution is the following matrix-matrix
2448    multiplication (Ho x Wo, Kh x Kw x C) * (Kh x Kw x C, D) for each input in
2449    the N input. For the case where N > 1 its a batched matrxi-matrix
2450    multplication.
2451
2452    Returns two handles:
2453    - One on the operation that produces the img2col tensor.
2454    - One on the final operation of the sequence that replaces the original
2455      convolution.
2456
2457    #### Return modes:
2458
2459    Returns a definite failure if target is not isolated from above.
2460    Returns a silenceable failure if the pattern application failed.
2461  }];
2462
2463  let arguments = (ins TransformHandleTypeInterface:$target);
2464  let results = (outs TransformHandleTypeInterface:$img2col_tensor,
2465                      TransformHandleTypeInterface:$transformed);
2466
2467  let assemblyFormat =
2468    "$target attr-dict `:` functional-type($target, results)";
2469
2470  let builders = [
2471    OpBuilder<(ins "Value":$target)>
2472  ];
2473
2474  let extraClassDeclaration = [{
2475    ::mlir::DiagnosedSilenceableFailure applyToOne(
2476        ::mlir::transform::TransformRewriter &rewriter,
2477        ::mlir::linalg::LinalgOp target,
2478        ::mlir::transform::ApplyToEachResultList &results,
2479        ::mlir::transform::TransformState &state);
2480  }];
2481}
2482
2483//===----------------------------------------------------------------------===//
2484// FlattenElementwiseLinalgOp
2485//===----------------------------------------------------------------------===//
2486
2487def FlattenElementwiseLinalgOp : Op<Transform_Dialect,
2488    "structured.flatten_elementwise",
2489    [FunctionalStyleTransformOpTrait,
2490     MemoryEffectsOpInterface,
2491     TransformOpInterface,
2492     TransformEachOpTrait,
2493     ReportTrackingListenerFailuresOpTrait]> {
2494  let description = [{
2495    Flattens the iteration space and (applicable) operands of elementwise
2496    linalg ops to a single dimension.
2497
2498    Returns one handle:
2499    - Flattened linalg operation.
2500
2501    #### Return modes:
2502
2503    Returns a definite failure if target is not isolated from above.
2504    Returns a silenceable failure if the pattern application failed.
2505  }];
2506
2507  let arguments = (ins TransformHandleTypeInterface:$target);
2508  let results = (outs TransformHandleTypeInterface:$transformed);
2509
2510  let assemblyFormat =
2511    "$target attr-dict `:` functional-type($target, results)";
2512
2513  let builders = [
2514    OpBuilder<(ins "Value":$target)>
2515  ];
2516
2517  let extraClassDeclaration = [{
2518    ::mlir::DiagnosedSilenceableFailure applyToOne(
2519        ::mlir::transform::TransformRewriter &rewriter,
2520        ::mlir::linalg::LinalgOp target,
2521        ::mlir::transform::ApplyToEachResultList &results,
2522        ::mlir::transform::TransformState &state);
2523  }];
2524}
2525
2526//===----------------------------------------------------------------------===//
2527// Transpose Conv2D
2528//===----------------------------------------------------------------------===//
2529
2530def TransposeConv2DOp : Op<Transform_Dialect,
2531    "structured.transpose_conv2d",
2532    [FunctionalStyleTransformOpTrait,
2533     MemoryEffectsOpInterface,
2534     TransformOpInterface,
2535     TransformEachOpTrait,
2536     ReportTrackingListenerFailuresOpTrait]> {
2537  let description = [{
2538    Convert linalg.conv_2d_nhwc_fhwc into linalg.conv_2d_nhwc_hwcf by introducing
2539    a linalg.transpose on the filter tensor/memref.
2540
2541    Whilst the fhwc filter channel ordering can be desirable for certain targets
2542    and is a more direct mapping to higher level dialects such as TOSA (which only
2543    supports this ordering) hwcf is better suited for transformations such as
2544    img2col which can make use of optimized BLAS routines such as GEMM.
2545
2546    Returns one handle:
2547    - The final operation of the sequence that replaces the original
2548      convolution.
2549
2550    #### Return modes:
2551
2552    Returns a definite failure if target is not isolated from above.
2553    Returns a silenceable failure if the pattern application failed.
2554  }];
2555
2556  let arguments = (ins TransformHandleTypeInterface:$target);
2557  let results = (outs TransformHandleTypeInterface:$transformed);
2558
2559  let assemblyFormat =
2560    "$target attr-dict `:` functional-type($target, results)";
2561
2562  let builders = [
2563    OpBuilder<(ins "Value":$target)>
2564  ];
2565
2566  let extraClassDeclaration = [{
2567    ::mlir::DiagnosedSilenceableFailure applyToOne(
2568        ::mlir::transform::TransformRewriter &rewriter,
2569        ::mlir::linalg::LinalgOp target,
2570        ::mlir::transform::ApplyToEachResultList &results,
2571        ::mlir::transform::TransformState &state);
2572  }];
2573}
2574
2575//===----------------------------------------------------------------------===//
2576// TransposeMatmulOp
2577//===----------------------------------------------------------------------===//
2578
2579def TransposeMatmulOp : Op<Transform_Dialect,
2580    "structured.transpose_matmul",
2581    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
2582     TransformOpInterface, TransformEachOpTrait,
2583     ReportTrackingListenerFailuresOpTrait]> {
2584  let description = [{
2585    Convert Linalg matmul ops to transposed variants.
2586
2587    By default the LHS matrix is transposed. Specify `<rhs>` to instead
2588    transpose RHS matrix.
2589
2590    #### Return modes:
2591
2592    This operation fails if `target` is unsupported, i.e., not a
2593    `linalg.matmul` or `linalg.batch_matmul`. Otherwise, the operation succeeds
2594    and returns a handle to the transposed matmul op.
2595  }];
2596
2597  let arguments = (ins
2598    TransformHandleTypeInterface:$target,
2599    DefaultValuedAttr<TransposeMatmulInput,
2600                      "TransposeMatmulInput::lhs">:$inputToTranspose);
2601  let results = (outs TransformHandleTypeInterface:$transformed);
2602
2603  let assemblyFormat = [{
2604    $target (`<` $inputToTranspose^ `>`)?
2605    attr-dict `:` functional-type($target, results)
2606  }];
2607
2608  let builders = [
2609    OpBuilder<(ins "Value":$target)>
2610  ];
2611
2612  let extraClassDeclaration = [{
2613    ::mlir::DiagnosedSilenceableFailure applyToOne(
2614        ::mlir::transform::TransformRewriter &rewriter,
2615        ::mlir::linalg::LinalgOp target,
2616        ::mlir::transform::ApplyToEachResultList &results,
2617        ::mlir::transform::TransformState &state);
2618  }];
2619}
2620
2621//===----------------------------------------------------------------------===//
2622// InsertSliceToCopyOp
2623//===----------------------------------------------------------------------===//
2624
2625def InsertSliceToCopyOp :
2626  Op<Transform_Dialect, "structured.insert_slice_to_copy",
2627    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
2628     TransformEachOpTrait, TransformOpInterface]> {
2629  let description = [{
2630    Targeted rewrite of an tensor.insert_slice to linalg.copy.
2631    This is useful to materialize copies explicitly before bufferization and
2632    transform them, avoiding the need to rediscover them after bufferization.
2633
2634    If the insert_slice source is already a linalg.copy, only return the source
2635    op (i.e. do not create an additional linalg.copy op).
2636
2637    #### Return modes:
2638
2639    The operation always succeeds and returns a handle to the relevant
2640    linalg.copy op.
2641  }];
2642
2643  let arguments = (ins TransformHandleTypeInterface:$target);
2644  let results = (outs TransformHandleTypeInterface:$transformed);
2645
2646  let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) ";
2647
2648  let builders = [
2649    OpBuilder<(ins "Value":$target)>,
2650  ];
2651  let extraClassDeclaration = [{
2652    ::mlir::DiagnosedSilenceableFailure applyToOne(
2653        ::mlir::transform::TransformRewriter &rewriter,
2654        ::mlir::Operation *target,
2655        ::mlir::transform::ApplyToEachResultList &results,
2656        ::mlir::transform::TransformState &state);
2657  }];
2658}
2659
2660//===----------------------------------------------------------------------===//
2661// MapCopyToThreadsOp
2662//===----------------------------------------------------------------------===//
2663
2664def MapCopyToThreadsOp :
2665  Op<Transform_Dialect, "structured.gpu.map_copy_to_threads",
2666    [FunctionalStyleTransformOpTrait,
2667     MemoryEffectsOpInterface,
2668     TransformEachOpTrait,
2669     TransformOpInterface]> {
2670  let description = [{
2671    Targeted mapping of a linalg.copy / tensor.pad operation on tensors to a GPU
2672    thread mapping.
2673
2674    This operation implements a greedy heuristic that determines a good
2675    distribution of threads to break down the copy/pad operation into.
2676    The heuristic is driven by considerations related to the underlying
2677    architecture for which good high-level decisions are needed assuming certain
2678    hardware features. Relevant features are exposed via first-class attributes
2679    to control the behavior of the transformation at a high level.
2680
2681    For now, a single heuristic is implemented and can be extended on a per-need
2682    basis.
2683
2684    #### Return modes
2685
2686    This operation fails definitely if there is an unsupported op (i.e., not
2687    linalg.copy / tensor.pad) among the targeted op. Otherwise, the operation
2688    always succeeds and returns a handle to the relevant tiled linalg.copy /
2689    tensor.pad op and the enclosing scf.forall op.
2690  }];
2691
2692  let arguments = (ins TransformHandleTypeInterface:$target,
2693                       I64Attr:$total_num_threads,
2694                       I64Attr:$desired_bit_alignment);
2695  let results = (outs TransformHandleTypeInterface:$forall_op,
2696                      TransformHandleTypeInterface:$tiled_op);
2697
2698  let assemblyFormat = [{
2699    $target
2700    `total_num_threads` `=` $total_num_threads
2701    `desired_bit_alignment` `=` $desired_bit_alignment
2702    attr-dict
2703    `:` functional-type(operands, results)
2704  }];
2705
2706  let builders = [
2707    OpBuilder<(ins "Value":$target)>,
2708  ];
2709  let extraClassDeclaration = [{
2710    ::mlir::DiagnosedSilenceableFailure applyToOne(
2711        ::mlir::transform::TransformRewriter &rewriter,
2712        ::mlir::Operation *target,
2713        ::mlir::transform::ApplyToEachResultList &results,
2714        ::mlir::transform::TransformState &state);
2715
2716    ::llvm::SmallVector<::mlir::OpFoldResult> getMixedNumThreads();
2717  }];
2718}
2719
2720//===----------------------------------------------------------------------===//
2721// Winograd Conv2D
2722//===----------------------------------------------------------------------===//
2723
2724def WinogradConv2DOp : Op<Transform_Dialect,
2725    "structured.winograd_conv2d",
2726    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
2727     TransformOpInterface, TransformEachOpTrait,
2728     ReportTrackingListenerFailuresOpTrait]> {
2729  let description = [{
2730    Winograd Conv2D algorithm will convert linalg Conv2D operation into batched
2731    matrix multiply. Before the matrix multiply, it will convert filter and
2732    input into a format suitable for batched matrix multiply. After the matrix
2733    multiply, it will convert output to the final result tensor.
2734
2735    The algorithm F(m x m, r x r) is
2736
2737    Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
2738
2739    The size of output Y is m x m. The size of filter g is r x r. The size of
2740    input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
2741    transformation matrices.
2742
2743    #### Return modes:
2744
2745    This operation produces a silenceable failure if `target` is unsupported.
2746    Otherwise, the operation succeeds and returns a handle of the sequence that
2747    replaces the original convolution.
2748  }];
2749
2750  let arguments = (ins TransformHandleTypeInterface:$target,
2751                       I64Attr:$m,
2752                       I64Attr:$r);
2753  let results = (outs TransformHandleTypeInterface:$transformed);
2754
2755  let assemblyFormat =
2756    "$target attr-dict `:` functional-type($target, results)";
2757
2758  let builders = [
2759    OpBuilder<(ins "Value":$target)>
2760  ];
2761
2762  let extraClassDeclaration = [{
2763    ::mlir::DiagnosedSilenceableFailure applyToOne(
2764        ::mlir::transform::TransformRewriter &rewriter,
2765        ::mlir::linalg::LinalgOp target,
2766        ::mlir::transform::ApplyToEachResultList &results,
2767        ::mlir::transform::TransformState &state);
2768  }];
2769}
2770
2771def DecomposeWinogradOp : Op<Transform_Dialect,
2772    "structured.decompose_winograd_op",
2773    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
2774     TransformOpInterface, TransformEachOpTrait,
2775     ReportTrackingListenerFailuresOpTrait]> {
2776  let description = [{
2777    Decompose winograd operations. It will convert filter, input and output
2778    transform operations into a combination of scf, tensor, and linalg
2779    equivalent operations. Before applying this transform operations, users
2780    need to tile winograd transform operations into supported sizes.
2781
2782    #### Return modes:
2783
2784    This operation fails if `target` is unsupported. Otherwise, the operation
2785    succeeds and returns a handle of the sequence that replaces the original
2786    operations.
2787  }];
2788
2789  let arguments = (ins TransformHandleTypeInterface:$target);
2790  let results = (outs TransformHandleTypeInterface:$transformed);
2791
2792  let assemblyFormat =
2793    "$target attr-dict `:` functional-type($target, results)";
2794
2795  let builders = [
2796    OpBuilder<(ins "Value":$target)>
2797  ];
2798
2799  let extraClassDeclaration = [{
2800    ::mlir::DiagnosedSilenceableFailure applyToOne(
2801        ::mlir::transform::TransformRewriter &rewriter,
2802        ::mlir::Operation *target,
2803        ::mlir::transform::ApplyToEachResultList &results,
2804        ::mlir::transform::TransformState &state);
2805  }];
2806}
2807
2808#endif // LINALG_TRANSFORM_OPS
2809