1//===- LinalgTransformOps.td - Linalg transform ops --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef LINALG_TRANSFORM_OPS 10#define LINALG_TRANSFORM_OPS 11 12include "mlir/Dialect/Linalg/TransformOps/LinalgTransformEnums.td" 13include "mlir/Dialect/Transform/IR/TransformAttrs.td" 14include "mlir/Dialect/Transform/IR/TransformDialect.td" 15include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td" 16include "mlir/Dialect/Transform/IR/TransformTypes.td" 17include "mlir/Dialect/SCF/IR/DeviceMappingInterface.td" 18include "mlir/Interfaces/SideEffectInterfaces.td" 19include "mlir/IR/OpBase.td" 20include "mlir/IR/RegionKindInterface.td" 21 22// This is roughly similar to OpFoldResult assuming the handle produces a single 23// value in the payload IR. 24def TransformAnyParamTypeOrAnyHandle : Type< 25 Or<[TransformHandleTypeInterface.predicate, 26 TransformParamTypeInterface.predicate]>, 27 "transform any param type or any handle type">; 28 29//===----------------------------------------------------------------------===// 30// Apply...PatternsOp 31//===----------------------------------------------------------------------===// 32 33def ApplyEraseUnnecessaryInputsPatternsOp : Op<Transform_Dialect, 34 "apply_patterns.linalg.erase_unnecessary_inputs", 35 [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> { 36 let description = [{ 37 Collects patterns that promote inputs to outputs and remove unused inputs of 38 `linalg.generic` ops. 39 }]; 40 41 let assemblyFormat = "attr-dict"; 42} 43 44def ApplyDecomposeTensorPackUnpackPatternsOp 45 : Op<Transform_Dialect, "apply_patterns.linalg.decompose_pack_unpack", 46 [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> { 47 let description = [{ 48 Collect patterns to decompose tensor.pack and tensor.unpack into e.g. 49 tensor::PadOp, linalg::transposeOp Ops. Requires all outer dims to be unit. 50 }]; 51 52 let assemblyFormat = "attr-dict"; 53} 54 55def ApplyDecomposeTensorPadPatternsOp 56 : Op<Transform_Dialect, "apply_patterns.linalg.decompose_pad", 57 [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> { 58 let description = [{ 59 Collect patterns to decompose tensor.pad into e.g. tensor::EmptyOp, 60 linalg::FillOp and tensor::InsertSliceOp. 61 }]; 62 63 let assemblyFormat = "attr-dict"; 64} 65 66def ApplyFoldUnitExtentDimsViaReshapesPatternsOp : Op<Transform_Dialect, 67 "apply_patterns.linalg.fold_unit_extent_dims_via_reshapes", 68 [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> { 69 let description = [{ 70 Collects patterns to fold unit-extent dimensions in operands/results of 71 linalg ops on tensors via reassociative reshape ops. 72 }]; 73 74 let assemblyFormat = "attr-dict"; 75} 76 77def ApplyFoldUnitExtentDimsViaSlicesPatternsOp : Op<Transform_Dialect, 78 "apply_patterns.linalg.fold_unit_extent_dims_via_slices", 79 [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> { 80 let description = [{ 81 Collects patterns to fold unit-extent dimensions in operands/results of 82 linalg ops on tensors via rank-reducing slices. 83 }]; 84 85 let assemblyFormat = "attr-dict"; 86} 87 88def ApplyTilingCanonicalizationPatternsOp : Op<Transform_Dialect, 89 "apply_patterns.linalg.tiling_canonicalization", 90 [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> { 91 let description = [{ 92 Collects canonicalization patterns relevant to apply after tiling patterns. 93 }]; 94 95 let assemblyFormat = "attr-dict"; 96} 97 98def ApplyFoldAddIntoDestPatternsOp : Op<Transform_Dialect, 99 "apply_patterns.linalg.fold_add_into_dest", 100 [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> { 101 let description = [{ 102 Collects patterns to replace linalg.add when destination passing suffices 103 for achieving the sum. 104 }]; 105 106 let assemblyFormat = "attr-dict"; 107} 108 109def ApplyPadVectorizationPatternsOp : Op<Transform_Dialect, 110 "apply_patterns.linalg.pad_vectorization", 111 [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> { 112 let description = [{ 113 Apply patterns that vectorize tensor.pad. 114 115 These patterns rewrite tensor.pad Ops using vector.transfer_read and 116 vector.transfer_write operations. This is done either by: 117 1. Folding tensor.pad with an existing vector.transfer_read / 118 vector.transfer_write Op (generated prior to running these patterns). 119 2. Rewriting it (when matched together with q tensor.insert_slice 120 consumer Op) as a vector.transfer_read + vector.transfer_write pair. 121 122 In both cases, these patterns look at producers and consumers for the 123 matched tensor.pad Op to find opportunities for vectorization. 124 }]; 125 126 let assemblyFormat = "attr-dict"; 127} 128 129//===----------------------------------------------------------------------===// 130// BufferizeToAllocationOp 131//===----------------------------------------------------------------------===// 132 133def BufferizeToAllocationOp : Op<Transform_Dialect, 134 "structured.bufferize_to_allocation", 135 [DeclareOpInterfaceMethods<TransformOpInterface>, 136 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 137 ReportTrackingListenerFailuresOpTrait]> { 138 let description = [{ 139 This transform bufferizes the targeted operation and materializes the 140 result in a new allocation. It replaces all original uses of the target 141 result with the newly allocated buffer, wrapped in a 142 `bufferization.to_tensor` op. It returns a handle to the newly allocated 143 buffer. Furthermore, it returns a handle that is mapped to all newly created 144 ops. 145 146 Only bufferizable ops are that bufferize to a memory write or have an 147 aliasing OpOperand (and do not themselves bufferize to an allocation) are 148 supported. They are bufferized using their BufferizableOpInterface 149 implementation. E.g.: 150 151 ``` 152 %0 = tensor.insert %f into %dest[%pos] : tensor<10xf32> 153 ``` 154 155 Is bufferized to: 156 157 ``` 158 %alloc = memref.alloc() : memref<10xf32> 159 bufferization.materialize_in_destination %dest in %alloc 160 memref.store %f, %alloc[%pos] : memref<10xf32> 161 %0 = bufferization.to_tensor %alloc restrict writable : memref<10xf32> 162 ``` 163 164 Selected ops that bufferize to an allocation (or need special handling) are 165 also supported: 166 - `tensor.pad` is lowered to an allocation, followed by a `linalg.fill` and 167 and a buffer copy (all on memrefs). 168 - `vector.mask` is bufferized together with its region. The allocation is 169 placed in front of the `vector.mask` op. 170 171 An optional memory space attribute can be specified for the materialized 172 buffer allocation. 173 174 If a memory copy is needed, a "bufferization.materialize_in_destination" is 175 used when possible. This is an op with tensor semantics that will bufferize 176 to a memory copy later. Which concrete op will be used for the memory copy 177 is up to the bufferization framework. Alternatively, a custom memcpy op can 178 be specified via `memcpy_op`. Currently supported are "memref.copy" and 179 "linalg.copy". In that case, the source of each memcpy must not have a 180 custom memory space. Furthermore, because the future buffer layout unknown 181 for a given tensor, a fully dynamic layout is assumed for best 182 compatibility. Users should use "bufferization.materialize_in_destination" 183 when possible. 184 185 "memref.alloc" is used for new buffer allocations. The buffer is deallocated 186 at the end of the block if the "emit_dealloc" attribute is present. If this 187 attribute is not present, the allocated memory will be leaked. However, 188 running the `-buffer-deallocation-pipeline` after all bufferization is done 189 will properly insert the corresponding deallocation(s). Custom allocation 190 ops can be specified via `alloc_op`. Currently supported are "memref.alloc" 191 and "memref.alloca". In case of a "memref.alloca", the buffer is not 192 deallocated. 193 194 If `bufferize_destination_only` is set, only the destination operands of the 195 op are bufferized to a new memory allocation, but not the op itself. 196 197 #### Return modes 198 199 This operation consumes the `target` handle and produces the 200 `allocated_buffer` and `new_ops` handles. It always succeeds. 201 }]; 202 203 let arguments = (ins TransformHandleTypeInterface:$target, 204 OptionalAttr<AnyAttr>:$memory_space, 205 DefaultValuedAttr<StrAttr, 206 "\"bufferization.materialize_in_destination\"">: 207 $memcpy_op, 208 DefaultValuedAttr<StrAttr, "\"memref.alloc\"">: 209 $alloc_op, 210 UnitAttr:$bufferize_destination_only, 211 UnitAttr:$emit_dealloc); 212 let results = (outs Transform_AnyValue:$allocated_buffer, 213 Transform_AnyOpType:$new_ops); 214 let assemblyFormat = "$target attr-dict `:` type($target)"; 215 let hasVerifier = 1; 216 217 let builders = [ 218 OpBuilder<(ins "Value":$target, "Attribute":$memorySpace)>, 219 OpBuilder<(ins "Value":$target, "int64_t":$memorySpace)> 220 ]; 221} 222 223//===----------------------------------------------------------------------===// 224// DecomposeOp 225//===----------------------------------------------------------------------===// 226 227def DecomposeOp : Op<Transform_Dialect, "structured.decompose", 228 [FunctionalStyleTransformOpTrait, 229 MemoryEffectsOpInterface, 230 TransformOpInterface, 231 TransformEachOpTrait, 232 ReportTrackingListenerFailuresOpTrait]> { 233 let description = [{ 234 Decomposes named complex operations, such as higher-dimensional 235 (depthwise) convolutions, into combinations of lower-dimensional equivalents 236 when possible. 237 238 #### Return modes 239 240 This operation ignores non-Linalg ops and drops them in the return. 241 If all the operations referred to by the `target` handle decompose 242 properly, the transform succeeds. Otherwise the transform produces a 243 silenceable failure. The return handle points to only the subset of 244 successfully produced computational operations, which can be empty. 245 }]; 246 247 let arguments = (ins TransformHandleTypeInterface:$target); 248 let results = (outs TransformHandleTypeInterface:$transformed); 249 let assemblyFormat = 250 "$target attr-dict `:` functional-type(operands, results)"; 251 252 let extraClassDeclaration = [{ 253 ::mlir::DiagnosedSilenceableFailure applyToOne( 254 ::mlir::transform::TransformRewriter &rewriter, 255 ::mlir::linalg::LinalgOp target, 256 ::mlir::transform::ApplyToEachResultList &results, 257 ::mlir::transform::TransformState &state); 258 }]; 259} 260 261//===----------------------------------------------------------------------===// 262// EliminateLinalgOpAnchoredEmptyTensorsOp 263//===----------------------------------------------------------------------===// 264 265def EliminateLinalgOpAnchoredEmptyTensorsOp 266 : Op<Transform_Dialect, "structured.eliminate_empty_tensors", 267 [DeclareOpInterfaceMethods<TransformOpInterface>, 268 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> { 269 let description = [{ 270 Try to eliminate all `tensor.empty` op uses that are anchored on a LinalgOp 271 within the targeted op. 272 273 This op is similar to `bufferization.eliminate_empty_tensors`, but specific 274 to LinalgOps. 275 276 `tensor.empty` ops cannot be bufferized. They can either be converted to 277 `bufferization.alloc_tensor` or replaced with another tensor (via this 278 transform). `tensor.empty` does not specify the contents of the returned 279 tensor so their results can be replaced with arbitrary tensor values as long 280 as the dimensions match. 281 282 This transform looks for `tensor.empty` ops where the SSA use-def chain of 283 the result ends in a supported LinalgOp (always following the aliasing 284 OpOperand/OpResult chain). The following LinalgOps are supported: 285 - Only parallel iterator types. 286 - The use-def chain ends in an input operand of the LinalgOp. 287 - The LinalgOp has an unused output operand with the same shape and 288 indexing map. 289 290 Example: 291 292 ``` 293 %0 = tensor.empty() 294 %1 = linalg.matmul ins(...) outs(%0) 295 %2 = linalg.generic ins(%1) outs(%dest) { 296 ^bb0(%in: f32, %out: f32): 297 // out not used 298 } 299 ``` 300 301 Is rewritten with: 302 ``` 303 %0 = tensor.empty() 304 %1 = linalg.matmul ins(...) outs(%dest) 305 %2 = linalg.generic ins(%0) outs(%1) { 306 ^bb0(%in: f32, %out: f32): 307 // Use %out instead of %in 308 } 309 ``` 310 311 After this transformation, the "ins" operand has no uses inside the body of 312 the LinalgOp and can be folded away with existing cleanup patterns. 313 Afterwards, the tensor::EmptyOp can also fold away, so that the example can 314 bufferize without an allocation (in the absence of other conflicts). 315 316 #### Return modes 317 318 This transform reads the target handle and modifies the payload. It does 319 not produce any handle. 320 }]; 321 322 let arguments = (ins TransformHandleTypeInterface:$target); 323 324 let results = (outs); 325 326 let assemblyFormat = "$target attr-dict `:` type($target)"; 327} 328 329//===----------------------------------------------------------------------===// 330// FuseOp 331//===----------------------------------------------------------------------===// 332 333def FuseOp : Op<Transform_Dialect, "structured.fuse", 334 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 335 DeclareOpInterfaceMethods<TransformOpInterface>, 336 ReportTrackingListenerFailuresOpTrait]> { 337 let description = [{ 338 Tiles the operations pointed to by the target handle and fuses their 339 producers greedily using the options provided as attributes. 340 341 If `apply_cleanup` is true then slice canonicalization is applied between 342 fusion steps. 343 }]; 344 345 let arguments = 346 (ins TransformHandleTypeInterface:$target, 347 DefaultValuedAttr<I64ArrayAttr, "{}">:$tile_sizes, 348 DefaultValuedAttr<I64ArrayAttr, "{}">:$tile_interchange, 349 DefaultValuedAttr<BoolAttr, "false">:$apply_cleanup); 350 let results = (outs TransformHandleTypeInterface:$transformed, 351 Variadic<TransformHandleTypeInterface>:$loops); 352 353 let assemblyFormat = [{ 354 $target ($tile_sizes^)? (`interchange` $tile_interchange^)? 355 (`apply_cleanup` `=` $apply_cleanup^)? attr-dict 356 `:` functional-type(operands, results) 357 }]; 358 let hasVerifier = 1; 359} 360 361//===----------------------------------------------------------------------===// 362// FuseIntoContainingOp 363//===----------------------------------------------------------------------===// 364 365def FuseIntoContainingOp : 366 Op<Transform_Dialect, "structured.fuse_into_containing_op", 367 [DeclareOpInterfaceMethods<TransformOpInterface, 368 ["allowsRepeatedHandleOperands"]>, 369 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 370 ReportTrackingListenerFailuresOpTrait]> { 371 let summary = "Fuse a producer into a containing operation."; 372 373 let description = [{ 374 Fuses the `producer_op` into the `containing_op`. 375 Returns a handle to the fused ops and the `new_containing_op`. 376 377 The producer is typically a slice of a tileable op (i.e., implements 378 TilingInterface). In that case, this transform computes the accessed 379 producer slice inside of the containing op ("tile and fuse") and if required, 380 creates a new containing op with outputs from the fused producer. Otherwise, 381 the entire producer is cloned inside the containing op ("clone and fuse"). 382 383 The containing op handle must be associated with exactly one payload op. The 384 producer op handle may be associated with multiple payload ops. This 385 transform fuses producers one-by-one, always picking an unspecified producer 386 that has at least one use inside the containing op among the 387 producers. A producer can be listed multiple times in the handle. 388 389 Note: If a producer has multiple uses inside the containing op, it is 390 currently tiled and/or cloned multiple times into the containing op. 391 TODO: Reuse already fused OpResults instead of tiling/cloning a second time 392 when possible. Fuse producers according to a topological sorting to achieve 393 the largest amount of reuse. 394 395 #### Return modes 396 397 If at least one producer could not be fused, this operation produces a 398 silenceable failure. This is the case when tiling fails or when no 399 producer op could be found among the remaining producers that has at least 400 one use within the containing op. I.e., "producers" that are not consumed 401 within the containing op are rejected by this operation. 402 403 This operation consumes the producer handle. 404 This operation only reads the containing op handle. 405 }]; 406 407 let arguments = (ins TransformHandleTypeInterface:$producer_op, 408 TransformHandleTypeInterface:$containing_op); 409 let results = (outs TransformHandleTypeInterface:$fused_op, 410 TransformHandleTypeInterface:$new_containing_op); 411 let assemblyFormat = "$producer_op `into` $containing_op attr-dict " 412 " `:` functional-type(operands, results)"; 413 414 let builders = [ 415 OpBuilder<(ins "Value":$producerOp, "Value":$containingOp)> 416 ]; 417} 418 419//===----------------------------------------------------------------------===// 420// GeneralizeOp 421//===----------------------------------------------------------------------===// 422 423def GeneralizeOp : Op<Transform_Dialect, "structured.generalize", 424 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 425 TransformOpInterface, TransformEachOpTrait, 426 ReportTrackingListenerFailuresOpTrait]> { 427 let description = [{ 428 Transforms a named structured operation into the generic form with the 429 explicit attached region. 430 431 #### Return modes 432 433 This operation ignores non-Linalg ops and drops them in the return. 434 If all the operations referred to by the `target` handle generalize 435 properly, the transform succeeds. Otherwise the transform produces a 436 silenceable failure. The return handle points to only the subset of 437 successfully produced equivalent generic operations, which can be empty or 438 contain the original ops if they were already in generic form. 439 }]; 440 441 let arguments = (ins TransformHandleTypeInterface:$target); 442 let results = (outs TransformHandleTypeInterface:$transformed); 443 let assemblyFormat = [{ 444 $target attr-dict `:` 445 custom<SemiFunctionType>(type($target), type($transformed), "false") 446 }]; 447 448 let extraClassDeclaration = [{ 449 ::mlir::DiagnosedSilenceableFailure applyToOne( 450 ::mlir::transform::TransformRewriter &rewriter, 451 ::mlir::linalg::LinalgOp target, 452 ::mlir::transform::ApplyToEachResultList &results, 453 ::mlir::transform::TransformState &state); 454 }]; 455} 456 457//===----------------------------------------------------------------------===// 458// SpecializeOp 459//===----------------------------------------------------------------------===// 460 461def SpecializeOp : Op<Transform_Dialect, "structured.specialize", 462 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 463 TransformOpInterface, TransformEachOpTrait, 464 ReportTrackingListenerFailuresOpTrait]> { 465 let description = [{ 466 Transforms a generic operation into the equivalent named form. 467 468 #### Return modes 469 470 This operation ignores non-Linalg ops and drops them in the return. If all 471 the operations referred to by the `target` handle specialize, the transform 472 succeeds; otherwise, the operation produces a silenceable failure. The return 473 handle points to only the subset of successfully produced equivalent named 474 operations, which can be empty or contain the original ops if they were already 475 in named form. The supported specialization to named Linalg operations are: 476 - linalg.copy of any rank. 477 }]; 478 479 let arguments = (ins TransformHandleTypeInterface:$target); 480 let results = (outs TransformHandleTypeInterface:$transformed); 481 let assemblyFormat = [{ 482 $target attr-dict `:` 483 custom<SemiFunctionType>(type($target), type($transformed), "false") 484 }]; 485 486 let extraClassDeclaration = [{ 487 ::mlir::DiagnosedSilenceableFailure applyToOne( 488 ::mlir::transform::TransformRewriter &rewriter, 489 ::mlir::linalg::LinalgOp target, 490 ::mlir::transform::ApplyToEachResultList &results, 491 ::mlir::transform::TransformState &state); 492 }]; 493} 494 495//===----------------------------------------------------------------------===// 496// InterchangeOp 497//===----------------------------------------------------------------------===// 498 499def InterchangeOp : Op<Transform_Dialect, "structured.interchange", 500 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 501 TransformOpInterface, TransformEachOpTrait, 502 ReportTrackingListenerFailuresOpTrait]> { 503 let description = [{ 504 Interchanges the iterators of the operations pointed to by the target handle 505 using the iterator interchange attribute. 506 507 #### Return modes 508 509 This operation ignores non-linalg::Generic ops and drops them in the return. 510 This operation fails if the interchange attribute is invalid. 511 If all the operations referred to by the `target` handle interchange 512 properly, the transform succeeds. 513 If any interchange fails, the transform produces a definite failure. 514 The return handle points to only the subset of successfully produced 515 interchanged operations, which can be empty. 516 }]; 517 518 let arguments = 519 (ins TransformHandleTypeInterface:$target, 520 ConfinedAttr<DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">, 521 [DenseArrayNonNegative<DenseI64ArrayAttr>]>:$iterator_interchange); 522 let results = (outs TransformHandleTypeInterface:$transformed); 523 524 let assemblyFormat = [{ 525 $target 526 (`iterator_interchange` `=` $iterator_interchange^)? attr-dict 527 `:` custom<SemiFunctionType>(type($target), type($transformed), "false") 528 }]; 529 let hasVerifier = 1; 530 531 let extraClassDeclaration = [{ 532 ::mlir::DiagnosedSilenceableFailure applyToOne( 533 ::mlir::transform::TransformRewriter &rewriter, 534 ::mlir::linalg::GenericOp target, 535 ::mlir::transform::ApplyToEachResultList &results, 536 ::mlir::transform::TransformState &state); 537 }]; 538} 539 540//===----------------------------------------------------------------------===// 541// LowerPackOp 542//===----------------------------------------------------------------------===// 543def LowerPackOp : Op<Transform_Dialect, "structured.lower_pack", [ 544 FunctionalStyleTransformOpTrait, 545 MemoryEffectsOpInterface, 546 TransformEachOpTrait, 547 TransformOpInterface, 548 ReportTrackingListenerFailuresOpTrait]> { 549 let description = [{ 550 Rewrite a tensor.pack into tensor.pad + tensor.expand_shape + linalg.transpose. 551 552 #### Return modes 553 554 This operation ignores non-pack ops and drops them in the return. 555 This operation produces a silenceable failure if the rewrite fails for any 556 reason. 557 If all the operations referred to by the `target` are rewritten, the 558 transform succeeds. 559 Return handles to the newly produced pad, expand_shape and transpose ops. 560 }]; 561 562 let arguments = (ins Transform_ConcreteOpType<"tensor.pack">:$target, 563 DefaultValuedAttr<BoolAttr, "true">:$lowerPadLikeWithInsertSlice); 564 let results = (outs Transform_ConcreteOpType<"tensor.pad">:$pad_op, 565 Transform_ConcreteOpType<"tensor.expand_shape">:$expand_shape_op, 566 Transform_ConcreteOpType<"linalg.transpose">:$transpose_op); 567 let assemblyFormat = [{ 568 $target attr-dict `:` functional-type(operands, results) 569 }]; 570 571 let extraClassDeclaration = [{ 572 ::mlir::DiagnosedSilenceableFailure applyToOne( 573 ::mlir::transform::TransformRewriter &rewriter, 574 ::mlir::tensor::PackOp target, 575 ::mlir::transform::ApplyToEachResultList &transformResults, 576 ::mlir::transform::TransformState &state); 577 }]; 578} 579 580//===----------------------------------------------------------------------===// 581// LowerUnPackOp 582//===----------------------------------------------------------------------===// 583def LowerUnPackOp : Op<Transform_Dialect, "structured.lower_unpack", [ 584 FunctionalStyleTransformOpTrait, 585 MemoryEffectsOpInterface, 586 TransformEachOpTrait, 587 TransformOpInterface, 588 ReportTrackingListenerFailuresOpTrait]> { 589 let description = [{ 590 Lower a tensor.unpack into empty + linalg.transpose + tensor.collapse_shape + 591 tensor.extract_slice. 592 593 #### Return modes 594 595 This operation ignores non-unpack ops and drops them in the return. 596 This operation produces a silenceable failure if the rewrite fails for any 597 reason. 598 If all the operations referred to by the `target` are rewritten, the 599 transform succeeds. 600 Return handles to the newly produced empty, transpose, collapse_shape and extract_slice ops. 601 }]; 602 603 let arguments = (ins Transform_ConcreteOpType<"tensor.unpack">:$target, 604 DefaultValuedAttr<BoolAttr, "true">:$lowerUnpadLikeWithExtractSlice); 605 let results = (outs Transform_ConcreteOpType<"tensor.empty">:$empty_op, 606 Transform_ConcreteOpType<"linalg.transpose">:$transpose_op, 607 Transform_ConcreteOpType<"tensor.collapse_shape">:$collapse_shape_op, 608 Transform_ConcreteOpType<"tensor.extract_slice">:$extract_slice_op); 609 let assemblyFormat = [{ 610 $target attr-dict `:` functional-type(operands, results) 611 }]; 612 613 let extraClassDeclaration = [{ 614 ::mlir::DiagnosedSilenceableFailure applyToOne( 615 ::mlir::transform::TransformRewriter &rewriter, 616 ::mlir::tensor::UnPackOp target, 617 ::mlir::transform::ApplyToEachResultList &transformResults, 618 ::mlir::transform::TransformState &state); 619 }]; 620} 621 622//===----------------------------------------------------------------------===// 623// MatchOp 624//===----------------------------------------------------------------------===// 625 626def MatchOp : Op<Transform_Dialect, "structured.match", 627 [MemoryEffectsOpInterface, 628 NavigationTransformOpTrait, 629 DeclareOpInterfaceMethods<TransformOpInterface>]> { 630 let description = [{ 631 Match op with the specified constraints, within the target op. 632 633 The following constraints are supported: 634 - interface: an optional MatchInterfaceEnum specifying an enum 635 representation for an interface to target. 636 - ops: an optional StrArrayAttr specifying the concrete name of an op. 637 Multiple names can be specified. Matched ops must have one of specified 638 names. 639 - attribute: the matched op must have all specified attributes (with their 640 specified values). 641 - filter_result_type: the matched op must return exactly this one type. 642 - filter_operand_types: all the operands of the matched op must must be of 643 this type. If more than a type is specified, then the length of the list 644 must be equal to the number of operands in the matched op, and the match 645 will succeed only if the operand types match all the types in the list 646 in the order in which they are specified. 647 648 Note: Only ops that satisfy all specified constraints are matched. 649 650 TODO: Extend with regions to allow a limited form of constraints. 651 652 #### Return modes 653 654 This op traverses the ops nested under `target` and returns the handles to 655 all the operations that match the requirements. 656 657 This op fails if the target is not a handle to exactly one operation. 658 Otherwise it succeeds. 659 660 This operation does not consume the target handle and produces new handles: 661 it is a navigation op. 662 }]; 663 664 let arguments = (ins TransformHandleTypeInterface:$target, 665 OptionalAttr<StrArrayAttr>:$ops, 666 OptionalAttr<MatchInterfaceEnum>:$interface, 667 OptionalAttr<DictionaryAttr>:$op_attrs, 668 OptionalAttr<TypeAttr>:$filter_result_type, 669 OptionalAttr<TypeArrayAttr>:$filter_operand_types); 670 // TODO: variadic results when needed. 671 let results = (outs TransformHandleTypeInterface:$results); 672 673 let builders = [ 674 OpBuilder<(ins "Value":$target, "ArrayRef<StringRef>":$opNames)>, 675 OpBuilder<(ins "TypeRange":$resultTypes, "Value":$target, "ArrayRef<StringRef>":$opNames)> 676 ]; 677 678 let assemblyFormat = [{ 679 (`ops` `{` $ops^ `}`)? 680 (`interface` `{` $interface^ `}`)? 681 (`attributes` $op_attrs^)? 682 (`filter_result_type` `=` $filter_result_type^)? 683 (`filter_operand_types` `=` $filter_operand_types^)? 684 `in` $target attr-dict 685 `:` functional-type($target, results) 686 }]; 687} 688 689//===----------------------------------------------------------------------===// 690// MultiTileSizesOp 691//===----------------------------------------------------------------------===// 692 693def MultiTileSizesOp : Op<Transform_Dialect, "structured.multitile_sizes", 694 [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 695 TransformOpInterface, TransformEachOpTrait, 696 ReportTrackingListenerFailuresOpTrait]> { 697 let description = [{ 698 Emits the IR computing the tile sizes `s1` and `s2` such that: 699 700 - there exists a combination of `n` tiles of size `s1` and `m` tiles of 701 size `s2` that covers the entirety of the iteration space `dimension` of 702 the target structured op; 703 - `s1`, `s2` is less than or equal to `target_size`; 704 - `s1` and `s2` are divisible by `divisor. 705 706 For example, for a dimension of size 54 with target size 12 and divisor 2, 707 this can emit the IR computing the tile size 10, used for 3 tiles, and 12, 708 used for 2 tiles, totally 10*3 + 12*2 = 54. Note that when the divisor does 709 not divide the original dimension size, it is impossible to compute such 710 tile sizes. An assertion is emitted to guard against this in the dynamic 711 case. 712 713 Expects the target size and the divisor to be strictly positive. Folds the 714 IR as much as possible, normally obtaining constant sizes and numbers of 715 tiles for a statically known dimension. 716 717 This does *not* consume the target handle and produces three handles each 718 pointing to single-result index-typed operations (which may be arithmetic 719 constant operations) defining the two respective tile sizes and the product 720 of the first tile size with the number of tiles of that size (useful for 721 splitting the iteration space). 722 723 This operation composes with the regular tiling when applied per-dimension: 724 725 ```mlir 726 %sz1, %sz2, %split = structured.multitile_sizes %target 727 { target_size = 10, dimension = 1 } 728 : !transform.any_op, !transform.param<i64>, 729 !transform.param<i64>, !transform.param<i64> 730 %handles = structured.split %target after %split { dimension = 1 } 731 : !transform.any_op, !transform.param<i64> 732 %low, %high = transform.split_handle %handles : (!transform.any_op) 733 -> (!transform.any_op, !transform.any_op) 734 %tiled_low, %loop1 = structured.tile_using_for %low [0, %sz1] 735 : (!transform.any_op, !transform.param<i64>) 736 -> (!transform.any_op, !transform.any_op) 737 %tiled_high, %loop2 = structured.tile_using_for %high [0, %sz2] 738 : (!transform.any_op, !transform.param<i64>) 739 -> (!transform.any_op, !transform.any_op) 740 %common = merge_handles %tiled_low, %tiled_high : !transform.any_op 741 742 %sz3, %sz4, %split = structured.multitile_size %target 743 { target_size = 42, dimension = 0 } 744 : !transform.any_op, !transform.any_op, 745 !transform.any_op, !transform.any_op 746 %sz3r, %sz4r, %splitr = replicate num(%common) %sz3, %sz4, %splitr 747 : !transform.any_op, !transform.any_op, !transform.any_op 748 structured.split %common after %splitr { dimension = 0 } 749 : !transform.any_op, !transform.any_op 750 // ... 751 ``` 752 }]; 753 754 let arguments = (ins TransformHandleTypeInterface:$target, 755 I64Attr:$dimension, 756 I64Attr:$target_size, 757 DefaultValuedAttr<I64Attr, "1">:$divisor); 758 let results = (outs TransformAnyParamTypeOrAnyHandle:$low_size, 759 TransformAnyParamTypeOrAnyHandle:$high_size, 760 TransformAnyParamTypeOrAnyHandle:$split_point); 761 let hasVerifier = 1; 762 let assemblyFormat = 763 "$target attr-dict `:` custom<MultitileSizesTypes>(" 764 "type($target), type($low_size), type($high_size), type($split_point))"; 765 766 let extraClassDeclaration = [{ 767 ::mlir::DiagnosedSilenceableFailure applyToOne( 768 ::mlir::transform::TransformRewriter &rewriter, 769 ::mlir::linalg::LinalgOp target, 770 ::mlir::transform::ApplyToEachResultList &results, 771 TransformState &state); 772 }]; 773} 774 775//===----------------------------------------------------------------------===// 776// PackOp 777//===----------------------------------------------------------------------===// 778 779def PackOp : Op<Transform_Dialect, "structured.pack", [ 780 DeclareOpInterfaceMethods<TransformOpInterface>, 781 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 782 ReportTrackingListenerFailuresOpTrait]> { 783 let description = [{ 784 Pack a LinalgOp by applying a data tiling transformation on the op and 785 packing the operands according to the `packed_sizes` specification. 786 787 Iterator dimensions are tiled in their canonical order in the op spec. 788 Operands are packed according to the same canonical order of the op iterator 789 dimensions. 790 791 Specifying a packed size of 0 for an iterator removes it from consideration 792 for packing. 793 794 `tensor.pack` (resp. `tensor.unpack`) operations are inserted for the operands 795 (resp. results) that need to be packed (resp. unpacked) according to the 796 `packed_sizes` specification. 797 798 #### Example 799 800 Consider a `linalg.matmul` with indexing maps: 801 ``` 802 // M N K M K 803 // affine_map<(d0, d1, d2) -> (d0, d2)> 804 // K N 805 // affine_map<(d0, d1, d2) -> (d2, d1)> 806 // M N 807 // affine_map<(d0, d1, d2) -> (d0, d1)> 808 %0 = linalg.matmul ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>) 809 outs( %C: tensor<?x?xf32>) 810 ``` 811 812 Specifying packed_sizes [2, 3, 4] results in tiling the iterator dimensions 813 M, N and K, in this order, in both the op and its operands. 814 ``` 815 // M N K m n k M K m k 816 // affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)> 817 // K N n k 818 // affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d4, d5)> 819 // M N m n 820 // affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)> 821 %0 = linalg.generic_representing_some_higher_d_matmul 822 ins(%A, %B: tensor<?x?x2x4xf32>, tensor<?x?x4x3xf32>) 823 outs( %C: tensor<?x?x2x3xf32>) 824 ``` 825 In particular, note that the second operand `B` has shape `KxNxnxk` (and not 826 `KxNxkxn` as one could expect by looking **only** at the operand). 827 828 Other layouts can be obtained unsurprisingly from this canonical 829 transformation by composing the resulting operation with a 830 `transform.structured.pack_transpose` op. 831 This composition allows separating concerns and composes better compared 832 to adding additional permutation attributes to this transform op. 833 834 #### Return modes 835 836 This operation applies to a single Linalg op, otherwise it fails. 837 This operation may produce a definite failure if the packing fails for any 838 reason. 839 840 The returned handle point to the packed LinalgOp. 841 }]; 842 843 let arguments = (ins TransformHandleTypeInterface:$target, 844 Variadic<TransformHandleTypeInterface>:$packed_sizes, 845 DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$static_packed_sizes); 846 let results = (outs TransformHandleTypeInterface:$packed_op); 847 let assemblyFormat = [{ 848 $target 849 `packed_sizes` `=` custom<DynamicIndexList>($packed_sizes, 850 $static_packed_sizes) 851 attr-dict 852 `:` functional-type(operands, results) 853 }]; 854 855 let builders = [ 856 OpBuilder<(ins "Value":$target, 857 "ArrayRef<OpFoldResult>":$mixedPackedSizes)> 858 ]; 859 860 let extraClassDeclaration = [{ 861 ::llvm::SmallVector<::mlir::OpFoldResult> getMixedPackedSizes(); 862 }]; 863} 864 865//===----------------------------------------------------------------------===// 866// PackGreedilyOp 867//===----------------------------------------------------------------------===// 868 869def PackGreedilyOp : Op<Transform_Dialect, "structured.pack_greedily", [ 870 DeclareOpInterfaceMethods<TransformOpInterface>, 871 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 872 ReportTrackingListenerFailuresOpTrait]> { 873 let description = [{ 874 Target a Linalg op and rewrite it into packed LinalgOp form by trying to 875 infer whether a known suboperation is embedded 876 877 Different packing strategies are applied in order, when one applies 878 successfully, the transform returns: 879 1. Matmul packing: Try to infer a matmul operation embedded in the target op. 880 Specifically, this looks for 2 parallel dimensions that participate in 881 an outer-product and 1 reduction dimension. 882 These dimensions are referred as (m, n, k) to match canonical matmul 883 terminology. 884 885 The packed sizes for (m, n, k) are specified by `matmul_packed_sizes` 886 and the optional `matmul_padded_sizes_next_multiple_of`. 887 When an entry `matmul_packed_sizes[i]` is non-0, the corresponding 888 dimension is packed by `matmul_packed_sizes[i]`. 889 Otherwise, the dimension is merely padded to the next multiple of 890 `matmul_padded_sizes_next_multiple_of[i]`. 891 892 `matmul_padded_sizes_next_multiple_of` is optional and is expected to 893 either be empty or of size `3`, matching the size of `matmul_packed_sizes`. 894 For each individual element of `matmul_packed_sizes` and 895 `matmul_padded_sizes_next_multiple_of`, only one of them is allowed to 896 be non-zero. 897 898 The ordering of the packed dimensions (mm, nn, kk) is specified by the 899 `matmul_inner_dims_order` attribute. 900 901 Packing occurs as follows: 902 1. Find the dimensions to pack according to the strategy. 903 2. The target is converted to linalg.generic form. 904 3. An interchange transform is applied to isolate the dimensions to pack as 905 the most minor indexing dimensions of the linalg.generic. The most minor 906 dimensions are themselves ordered according to `inner_dims_order`. 907 4. An elementwise traversal of `matmul_packed_sizes` and 908 `matmul_padded_sizes_next_multiple_of` is performed and for each 909 dimension `d`, either pack to `matmul_packed_sizes[d]` or pad to the 910 `matmul_padded_sizes_next_multiple_of[d]`. 911 5. Packing/padding is performed by the amounts determined in step 4. and 912 following `inner_dims_order`. 913 914 By normalizing the most minor dimensions to `inner_dims_order`, the transform 915 guarantees that packing immediately generates inner dimensions in a desirable 916 layout. 917 918 Outer dimension layout permutations are not controlled by this transform op 919 at the moment and can be obtained by composing with the pack_transpose 920 transformation. 921 922 #### Return modes 923 924 This operation ignores non-Linalg ops and drops them in the return. 925 It returns the list of packed Linalg ops or the original op when all available 926 packing strategies failed to apply. 927 }]; 928 929 // TODO: Transform_ConcreteOpType<linalg::LinalgOp> needs interface. 930 let arguments = (ins TransformHandleTypeInterface:$target, 931 Variadic<TransformHandleTypeInterface>:$matmul_packed_sizes, 932 ConfinedAttr<DefaultValuedAttr<DenseI64ArrayAttr, "{}">, 933 [DenseArrayCount<3>]>:$static_matmul_packed_sizes, 934 ConfinedAttr<DefaultValuedAttr<DenseI64ArrayAttr, "{}">, 935 [Attr< 936 Or<[DenseArrayCount<0>.predicate, 937 DenseArrayCount<3>.predicate]>, 938 "with 0 or 3 elements" 939 >]> 940 :$matmul_padded_sizes_next_multiple_of, 941 ConfinedAttr<DefaultValuedAttr<DenseI64ArrayAttr, "{}">, 942 [DenseArrayCount<3>]>:$matmul_inner_dims_order); 943 let results = (outs TransformHandleTypeInterface:$packed_op); 944 945 let builders = [ 946 OpBuilder<(ins "Value":$target, 947 "ArrayRef<OpFoldResult>":$mixedMatmulPackedSizes, 948 "ArrayRef<int64_t>":$matmulPaddededSizesNextMultipleOf, 949 CArg<"ArrayRef<int64_t>", "{}">:$matmulDimsInnerDimsOrder)> 950 ]; 951 952 let assemblyFormat = [{ 953 $target 954 oilist( 955 `matmul_packed_sizes` `=` custom<DynamicIndexList>($matmul_packed_sizes, 956 $static_matmul_packed_sizes) 957 (`matmul_padded_sizes_next_multiple_of` `=` 958 $matmul_padded_sizes_next_multiple_of^)? 959 `matmul_inner_dims_order` `=` $matmul_inner_dims_order 960 ) 961 attr-dict 962 `:` functional-type(operands, results) 963 }]; 964 let hasVerifier = 1; 965 966 let extraClassDeclaration = [{ 967 /// Returns the list of tile sizes, which may be static (Attribute) or 968 /// dynamic (Value). 969 SmallVector<OpFoldResult> getMixedMatmulPackedSizes(); 970 }]; 971} 972 973//===----------------------------------------------------------------------===// 974// PackTransposeOp 975//===----------------------------------------------------------------------===// 976 977def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [ 978 FunctionalStyleTransformOpTrait, 979 MemoryEffectsOpInterface, 980 DeclareOpInterfaceMethods<TransformOpInterface>, 981 ReportTrackingListenerFailuresOpTrait]> { 982 let description = [{ 983 Apply a transposition to a single `tensor.pack` (resp. `tensor.unpack`) and 984 update the `linalg.generic` op that consumes (resp. produces) the operation. 985 986 This transform allows composing a simple `structured.pack` with additional 987 transpositions to e.g. match the data format required by a specific library 988 call or ISA instruction. 989 990 The transpose spec must specify at least one of `outer_perm` or `inner_perm` 991 attributes, which will act upon the `outer_dims_perm` or `inner_dims_pos` of 992 the specified `tensor.pack` or `tensor.unpack` op. 993 994 If the `target` of this op is a `tensor.pack` then a new `tensor.empty` will 995 be created along with transposed versions of the `tensor.pack` and the 996 consuming `linalg.generic`, which is expected to be the sole consumer. 997 998 If the `target` of this op is a `tensor.unpack` then the whole pack / compute 999 / unpack chain will be transposed and transposed clones of `tensor.pack`, 1000 the consuming `linalg.generic` and the tail `tensor.pack` will be created. 1001 1002 #### Return modes 1003 1004 This operation targets a single `tensor.pack` / `tensor.unpack` op and a 1005 single matching `linalg.generic` that consumes / produces the op. Otherwise, 1006 it produces a silenceableFailure. 1007 1008 This operation may produce a silenceableFailure if the transpose spec is 1009 ill-formed (i.e. `outer_perm` or `inner_perm` are not permutations of the 1010 proper rank) or if the transposition of all involved operations fails for any 1011 reason. 1012 1013 This operation returns 3 handles, one to the transformed LinalgOp, one to 1014 the transformed `tensor.pack` and one to the transformed `tensor.unpack`. 1015 The last handle for `tensor.unpack` is empty if `target_pack_or_unpack_op` 1016 was not itself a `tensor.unpack`. 1017 }]; 1018 1019 let arguments = (ins TransformHandleTypeInterface:$target_pack_or_un_pack_op, 1020 TransformHandleTypeInterface:$target_linalg_op, 1021 DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_perm, 1022 DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$inner_perm); 1023 let results = (outs TransformHandleTypeInterface:$packed_op, 1024 TransformHandleTypeInterface:$pack_op, 1025 TransformHandleTypeInterface:$un_pack_op); 1026 let assemblyFormat = [{ 1027 $target_pack_or_un_pack_op 1028 `with_compute_op` `(` $target_linalg_op `)` 1029 (`outer_perm` `=` $outer_perm^ )? 1030 (`inner_perm` `=` $inner_perm^ )? 1031 attr-dict 1032 `:` functional-type(operands, results) 1033 }]; 1034 1035 let hasVerifier = 1; 1036} 1037 1038//===----------------------------------------------------------------------===// 1039// PadOp 1040//===----------------------------------------------------------------------===// 1041 1042def PadOp : Op<Transform_Dialect, "structured.pad", 1043 [FunctionalStyleTransformOpTrait, DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 1044 TransformOpInterface, 1045 ReportTrackingListenerFailuresOpTrait]> { 1046 let description = [{ 1047 Pads the operations pointed to by the target handle using the options 1048 provides as operation attributes. The operation returns a handle to the 1049 padded operation and to the padding operation ("tensor.pad"). 1050 1051 To preserve tensor SSA use-def chains, the unpadded result is copied back to 1052 the original destination tensor of the targeted op. The op that copies back 1053 the result can be customized with `copy_back_op`: 1054 1055 * "bufferization.materialize_in_destination" (default) 1056 * "linalg.copy" 1057 * "none" (no copy back) 1058 1059 #### Return modes 1060 1061 This operation ignores non-Linalg ops and drops them in the return. 1062 This operation may produce a definite failure if the padding fails for any 1063 reason. 1064 1065 If all the operations referred to by the `target` handle pad 1066 properly, the transform succeeds. Otherwise the transform produces a 1067 silenceable failure. 1068 The return handle points to only the subset of successfully produced 1069 padded operations, which can be empty. 1070 }]; 1071 1072 let arguments = 1073 (ins TransformHandleTypeInterface:$target, 1074 DefaultValuedAttr<ArrayAttr, "{}">:$padding_values, 1075 DefaultValuedAttr<I64ArrayAttr, "{}">:$padding_dimensions, 1076 Variadic<TransformAnyParamTypeOrAnyHandle>:$pad_to_multiple_of, 1077 DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">: 1078 $static_pad_to_multiple_of, 1079 DefaultValuedAttr<I64ArrayAttr, "{}">:$nofold_flags, 1080 DefaultValuedAttr< 1081 TypedArrayAttrBase<I64ArrayAttr, "array of arrays of i64">, 1082 "{}">:$transpose_paddings, 1083 DefaultValuedAttr<StrAttr, "::mlir::bufferization::MaterializeInDestinationOp::getOperationName()">:$copy_back_op); 1084 let results = (outs TransformHandleTypeInterface:$padded, 1085 TransformHandleTypeInterface:$pad, 1086 TransformHandleTypeInterface:$copy); 1087 1088 let assemblyFormat = [{ 1089 $target 1090 (`pad_to_multiple_of` custom<DynamicIndexList>($pad_to_multiple_of, $static_pad_to_multiple_of)^)? 1091 attr-dict 1092 `:` functional-type(operands, results) 1093 }]; 1094 1095 let hasVerifier = 1; 1096 1097 let builders = [ 1098 // Builder for a transform::PadOp with automatic inference of padding 1099 // value. Warning: this will set the value 0 for the inferred elemental 1100 // type without taking the op into account and thus only work for the 1101 // add/mul ring at the moment. 1102 // TODO: support other operations (e.g. min, max etc). 1103 OpBuilder<(ins "Value":$target, 1104 "ArrayRef<int64_t>":$paddingDimensions, 1105 CArg<"ArrayRef<int64_t>", "{}">:$staticPadToMultipleOf, 1106 CArg<"ArrayRef<int64_t>", "{}">:$nofoldFlags, 1107 CArg<"ArrayRef<Attribute>", "{}">:$transposePaddings, 1108 CArg<"StringRef", "::mlir::bufferization::MaterializeInDestinationOp::getOperationName()">:$copyBackOp)>, 1109 OpBuilder<(ins "Value":$target, 1110 "ArrayRef<int64_t>":$paddingDimensions, 1111 "ArrayRef<OpFoldResult>":$mixedPadToMultipleOf, 1112 CArg<"ArrayRef<int64_t>", "{}">:$nofoldFlags, 1113 CArg<"ArrayRef<Attribute>", "{}">:$transposePaddings, 1114 CArg<"StringRef", "::mlir::bufferization::MaterializeInDestinationOp::getOperationName()">:$copyBackOp)> 1115 ]; 1116 1117 let extraClassDeclaration = [{ 1118 /// copy_back_op attribute value indicating that no copy back is desired. 1119 static constexpr StringRef kCopyOpNone = "none"; 1120 1121 /// Returns a mix of dynamic `pad_to_multiple_of` and static `static_pad_to_multiple_of`. 1122 SmallVector<OpFoldResult> getMixedPadToMultipleOf(); 1123 1124 ::mlir::DiagnosedSilenceableFailure apply( 1125 ::mlir::transform::TransformRewriter &rewriter, 1126 ::mlir::transform::TransformResults &results, 1127 ::mlir::transform::TransformState &state); 1128 }]; 1129} 1130 1131//===----------------------------------------------------------------------===// 1132// HoistPadOp 1133//===----------------------------------------------------------------------===// 1134 1135def HoistPadBuildPackingLoopNestOp : 1136 Op<Transform_Dialect, 1137 "structured.hoist_pad.build_packing_loop_nest", 1138 [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 1139 DeclareOpInterfaceMethods<TransformOpInterface>, 1140 ReportTrackingListenerFailuresOpTrait]> { 1141 let description = [{ 1142 Helper transform used to hoist a tensor.pad target operation. This operation 1143 creates the packing loop nest required by the hoist_pad operation and makes 1144 that functionality available independently. 1145 1146 TODO: In the future, we should consider rewriting as a tensor.pack after 1147 hoisting since this abstraction is now available. 1148 1149 #### Return modes 1150 1151 This operation ignores non-tensor.pad ops and drops them in the result. 1152 If any non-tensor.pad is passed, the transform emits a silenceable failure. 1153 1154 The return handle points to only the subset of successfully created packing 1155 loop nests, which can be empty. 1156 }]; 1157 1158 // Also allow any payload operation for simpler composition. Non-tensor.pad ops 1159 // will be dropped from the results. 1160 let arguments = 1161 (ins TransformHandleTypeInterface:$target, 1162 TransformHandleTypeInterface:$loop, 1163 DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$transpose); 1164 let results = (outs TransformHandleTypeInterface:$packing_loop); 1165 1166 let assemblyFormat = [{ 1167 $target 1168 `above` $loop 1169 (`,` `transpose` `by` $transpose^)? 1170 attr-dict 1171 `:` functional-type(operands, results) 1172 }]; 1173 let hasVerifier = 1; 1174} 1175 1176def HoistPadOp : Op<Transform_Dialect, "structured.hoist_pad", 1177 [FunctionalStyleTransformOpTrait, 1178 MemoryEffectsOpInterface, 1179 TransformOpInterface, 1180 TransformEachOpTrait]> { 1181 let description = [{ 1182 Hoist the tensor.pad target operation by at most the given number of loops. 1183 Optionally apply the transpose attribute to the inner dimensions. 1184 1185 TODO: In the future, we should consider rewriting as a tensor.pack after 1186 hoisting since this abstraction is now available. 1187 TODO: Maybe also return the linalg.generic transpose created at some point. 1188 1189 #### Return modes 1190 1191 This operation ignores non-tensor.pad ops and drops them in the result. 1192 If any non-tensor.pad is passed, the transform emits a silenceable failure. 1193 1194 If all the operations referred to by the `target` handle padproperly, the 1195 transform succeeds. Otherwise the transform produces a silenceable failure. 1196 1197 The return handle points to only the subset of successfully hoisted 1198 tensor.pad operations, which can be empty. 1199 }]; 1200 1201 // Also allow any operation for simpler composition. Non-tensor.pad ops 1202 // will be dropped from the results. 1203 let arguments = 1204 (ins TransformHandleTypeInterface:$target, 1205 I64Attr:$num_loops, 1206 DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$transpose); 1207 let results = (outs TransformHandleTypeInterface:$transformed); 1208 1209 let assemblyFormat = [{ 1210 $target 1211 `by` $num_loops `loops` 1212 (`,` `transpose` `by` $transpose^)? 1213 attr-dict 1214 `:` functional-type(operands, results) 1215 }]; 1216 let hasVerifier = 1; 1217 1218 let extraClassDeclaration = [{ 1219 ::mlir::DiagnosedSilenceableFailure applyToOne( 1220 ::mlir::transform::TransformRewriter &rewriter, 1221 ::mlir::tensor::PadOp, 1222 ::mlir::transform::ApplyToEachResultList &results, 1223 ::mlir::transform::TransformState &state); 1224 }]; 1225} 1226 1227//===----------------------------------------------------------------------===// 1228// PromoteOp 1229//===----------------------------------------------------------------------===// 1230 1231 1232def PromoteOp : Op<Transform_Dialect, "structured.promote", 1233 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 1234 TransformOpInterface, TransformEachOpTrait, 1235 ReportTrackingListenerFailuresOpTrait]> { 1236 let description = [{ 1237 Promotes the specified operands of the target into a separate memory buffer. 1238 1239 At this point, this transform does not allow customizing alloc/dealloc 1240 functions nor the behavior on copy in/out operations. 1241 1242 #### Return modes 1243 1244 This operation applies to a single Linalg op that satisfies the 1245 `promoteSubviewsPrecondition`, otherwise it fails. 1246 1247 If the operations referred to by the `target` handle promote 1248 properly, the transform succeeds. 1249 1250 When successful, the return handle points to the $target operation that 1251 was modified inplace. 1252 }]; 1253 1254 let arguments = (ins TransformHandleTypeInterface:$target, 1255 DefaultValuedAttr<I64ArrayAttr, "{}">:$operands_to_promote, 1256 DefaultValuedAttr<BoolArrayAttr, "{}">:$use_full_tile_buffers, 1257 UnitAttr:$use_full_tiles_by_default, 1258 UnitAttr:$use_alloca, 1259 OptionalAttr<AnyAttr>:$memory_space, 1260 OptionalAttr<DeviceMappingArrayAttr>:$mapping, 1261 OptionalAttr<I64Attr>:$alignment); 1262 let results = (outs TransformHandleTypeInterface:$transformed); 1263 1264 let assemblyFormat = [{ 1265 $target attr-dict `:` 1266 custom<SemiFunctionType>(type($target), type($transformed), "false") 1267 }]; 1268 1269 let extraClassDeclaration = [{ 1270 ::mlir::DiagnosedSilenceableFailure applyToOne( 1271 ::mlir::transform::TransformRewriter &rewriter, 1272 ::mlir::linalg::LinalgOp target, 1273 ::mlir::transform::ApplyToEachResultList &results, 1274 ::mlir::transform::TransformState &state); 1275 }]; 1276} 1277 1278//===----------------------------------------------------------------------===// 1279// ReplaceOp 1280//===----------------------------------------------------------------------===// 1281 1282def ReplaceOp : Op<Transform_Dialect, "structured.replace", 1283 [IsolatedFromAbove, DeclareOpInterfaceMethods<TransformOpInterface>, 1284 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 1285 ReportTrackingListenerFailuresOpTrait] # GraphRegionNoTerminator.traits> { 1286 let description = [{ 1287 Replace all `target` payload ops with the single op that is contained in 1288 this op's region. All targets must have zero arguments and must be isolated 1289 from above. 1290 1291 This op is for debugging/experiments only. 1292 1293 #### Return modes 1294 1295 This operation consumes the `target` handle. 1296 }]; 1297 1298 let arguments = (ins TransformHandleTypeInterface:$target); 1299 let results = (outs TransformHandleTypeInterface:$replacement); 1300 let regions = (region SizedRegion<1>:$bodyRegion); 1301 let assemblyFormat = [{ 1302 $target attr-dict-with-keyword regions `:` 1303 custom<SemiFunctionType>(type($target), type($replacement), "false") 1304 }]; 1305 let hasVerifier = 1; 1306} 1307 1308//===----------------------------------------------------------------------===// 1309// ScalarizeOp 1310//===----------------------------------------------------------------------===// 1311 1312def ScalarizeOp : Op<Transform_Dialect, "structured.scalarize", 1313 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 1314 TransformOpInterface, TransformEachOpTrait, 1315 ReportTrackingListenerFailuresOpTrait]> { 1316 let description = [{ 1317 Indicates that ops of a specific kind in the given function should be 1318 scalarized (i.e. their dynamic dimensions tiled by 1). 1319 1320 #### Return modes: 1321 1322 This operation ignores non-Linalg ops and drops them in the return. 1323 This operation produces definite failure if the scalarization fails for any 1324 reason. 1325 If all the operations referred to by the `target` handle scalarize 1326 properly, the transform succeeds. Otherwise the transform produces a 1327 silenceable failure. 1328 1329 The return handle points to only the subset of successfully produced 1330 tiled-by-1 operations, which can be empty. 1331 1332 This operation does not return handles to the tiled loop. 1333 We make this design choice because it is hard to know ahead of time the 1334 number of loops that will be produced (it depends on the number of dynamic 1335 dimensions after multiple transformations have been applied). 1336 Loops can always be recovered by navigating from the tiled operations if 1337 needed. 1338 }]; 1339 1340 let arguments = (ins TransformHandleTypeInterface:$target); 1341 let results = (outs TransformHandleTypeInterface:$result); 1342 1343 let assemblyFormat = [{ 1344 $target attr-dict `:` 1345 custom<SemiFunctionType>(type($target), type($result), "false") 1346 }]; 1347 1348 let extraClassDeclaration = [{ 1349 ::mlir::DiagnosedSilenceableFailure applyToOne( 1350 ::mlir::transform::TransformRewriter &rewriter, 1351 ::mlir::linalg::LinalgOp target, 1352 ::mlir::transform::ApplyToEachResultList &results, 1353 ::mlir::transform::TransformState &state); 1354 }]; 1355} 1356 1357//===----------------------------------------------------------------------===// 1358// ConvertToLoopsOp 1359//===----------------------------------------------------------------------===// 1360 1361def ConvertToLoopsOp : Op<Transform_Dialect, "structured.convert_to_loops", 1362 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 1363 DeclareOpInterfaceMethods<TransformOpInterface>, 1364 ReportTrackingListenerFailuresOpTrait]> { 1365 let description = [{ 1366 For operations that implement the `TilingInterface`, and implement 1367 the `generateScalarImplementation` method, lowers the operation to 1368 loops. The return handle points to all generated loops. 1369 Fails if the payload ops cannot be lowered to loops. 1370 }]; 1371 1372 let arguments = (ins TransformHandleTypeInterface:$target); 1373 let results = (outs TransformHandleTypeInterface:$result); 1374 1375 let assemblyFormat = [{ 1376 $target attr-dict `:` functional-type(operands, results) 1377 }]; 1378} 1379 1380//===----------------------------------------------------------------------===// 1381// DecomposeInterfaceOp 1382//===----------------------------------------------------------------------===// 1383 1384def DecomposeInterfaceOp : Op<Transform_Dialect, "structured.decompose_interface", 1385 [FunctionalStyleTransformOpTrait, 1386 MemoryEffectsOpInterface, 1387 TransformOpInterface, 1388 TransformEachOpTrait, 1389 ReportTrackingListenerFailuresOpTrait]> { 1390 let description = [{ 1391 TODO 1392 }]; 1393 1394 let arguments = (ins TransformHandleTypeInterface:$target); 1395 let results = (outs TransformHandleTypeInterface:$transformed); 1396 let assemblyFormat = 1397 "$target attr-dict `:` functional-type(operands, results)"; 1398 1399 let extraClassDeclaration = [{ 1400 ::mlir::DiagnosedSilenceableFailure applyToOne( 1401 ::mlir::transform::TransformRewriter &rewriter, 1402 ::mlir::Operation *target, 1403 ::mlir::transform::ApplyToEachResultList &results, 1404 ::mlir::transform::TransformState &state); 1405 }]; 1406} 1407//===----------------------------------------------------------------------===// 1408// RewriteInDestinationPassingStyleOp. 1409//===----------------------------------------------------------------------===// 1410 1411def RewriteInDestinationPassingStyleOp : Op< 1412 Transform_Dialect, "structured.rewrite_in_destination_passing_style", 1413 [FunctionalStyleTransformOpTrait, 1414 MemoryEffectsOpInterface, 1415 TransformOpInterface, 1416 TransformEachOpTrait, 1417 ReportTrackingListenerFailuresOpTrait]> { 1418 let description = [{ 1419 Rewrite a supported tensor operation that is not in destination-passing style 1420 into a form that is in destination-passing style. 1421 Currently supported operations are: 1422 - tensor.pad 1423 - tensor.generate 1424 - tensor.from_elements 1425 This dichotomy hints at a future interface, for now the implementation just 1426 switches between different implementation. 1427 1428 #### Return modes 1429 1430 This operation ignores non-unsupported ops and drops them from the return. 1431 If all the operations referred to by the `target` handle generalize 1432 properly, the transform succeeds. Otherwise the transform produces a 1433 silenceable failure. 1434 The return handle points to a subset of successfully produced operations: 1435 - `tensor.pad` case, the returned handle points to the tensor.insert_slice. 1436 - `tensor.generate` case, the returned handle points to the linalg.generic. 1437 - `tensor.from_elements` case, the returned handle points to the last 1438 `tensor.insert`. 1439 }]; 1440 1441 let arguments = (ins TransformHandleTypeInterface:$target); 1442 let results = (outs TransformHandleTypeInterface:$transformed); 1443 let assemblyFormat = [{ 1444 $target attr-dict 1445 `:` functional-type($target, results) 1446 }]; 1447 1448 let extraClassDeclaration = [{ 1449 ::mlir::DiagnosedSilenceableFailure applyToOne( 1450 ::mlir::transform::TransformRewriter &rewriter, 1451 ::mlir::Operation *target, 1452 ::mlir::transform::ApplyToEachResultList &results, 1453 ::mlir::transform::TransformState &state); 1454 }]; 1455} 1456 1457//===----------------------------------------------------------------------===// 1458// SplitOp 1459//===----------------------------------------------------------------------===// 1460 1461def SplitOp : Op<Transform_Dialect, "structured.split", 1462 [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 1463 DeclareOpInterfaceMethods<TransformOpInterface>, 1464 ReportTrackingListenerFailuresOpTrait]> { 1465 let description = [{ 1466 Splits the given `target` op into two or more complementary 1467 parts, which combined cover the entire iteration domain of the original op. 1468 The split is performed along the iteration space dimension provided as 1469 chunk size attribute specifying the size of the lower part; the remaining 1470 range in the iteration space is assigned as the upper part. In case of 1471 dimension overflow, the transformation fails. The split is performed at the 1472 dimension iterator value specified as either the static chunk size 1473 attribute when it is known at transform IR construction time or 1474 as the handle to an operation producing a single index-typed value 1475 when it is computed by payload IR. In the latter case, the chunk size 1476 point must be set to `ShapedType::kDynamic` and the dynamic size handle 1477 must point to as many value-producing operations as there are structured 1478 operations pointed to by the target handle. 1479 1480 The operation consumes the target handle, but preserves the chunk size 1481 handle if provided. Without the `multiway` attribute, it produces a 1482 new handle that is a list of the two parts of the structured op after 1483 splitting, whose lower index part corresponding to the part with lower 1484 iteration space indices. 1485 1486 Multiway split mode is enabled by specifying the `multiway` attribute. 1487 In this mode a single `target` op is split into multiple parts covering 1488 the iteration space of the specified dimension. `static_chunk_sizes` and 1489 `dynamic_chunk_sizes` in this case is a list of chunk sizes that the given 1490 dimension should be split into. With `multiway` it also produces a handle; 1491 The result handle is a list of the multiple parts of the structured op 1492 after splitting, where the target dimensions for each linalg op in the 1493 list corresponds to the chunk sizes specfied in the input split list. 1494 If the chunk sizes do not cover the entire iteration space, the leftover 1495 chunk is the last payload in the result handle. 1496 1497 As the result handle is most of time a list, an `transform.split_handle` 1498 is needed to access individual handle. 1499 }]; 1500 1501 let arguments = (ins TransformHandleTypeInterface:$target, 1502 I64Attr:$dimension, 1503 Optional<TransformAnyParamTypeOrAnyHandle>:$dynamic_chunk_sizes, 1504 I64Attr:$static_chunk_sizes, 1505 UnitAttr:$multiway); 1506 let results = (outs TransformHandleTypeInterface:$split_list); 1507 let hasCustomAssemblyFormat = 1; 1508 let hasVerifier = 1; 1509} 1510 1511//===----------------------------------------------------------------------===// 1512// SplitReductionOp 1513//===----------------------------------------------------------------------===// 1514 1515def SplitReductionOp : Op<Transform_Dialect, "structured.split_reduction", 1516 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 1517 TransformEachOpTrait, TransformOpInterface, 1518 ReportTrackingListenerFailuresOpTrait]> { 1519 let description = [{ 1520 Indicates that the given `target` op should be transformed with the 1521 `splitReduction` transformation and split factor provided as attribute. 1522 1523 The `splitReduction` transformation splits the first single linalg op 1524 reduction into a parallel and reduction dimension. 1525 A new `linalg.generic` op is created to perform the rest of the reduction. 1526 1527 The transformation supports different configurations attributes: 1528 - split_factor: the factor by which to split (i.e. the size of the 1529 remaining reduction after splitting). 1530 - insert_split_dimension: the dimension in the temporary tensor into 1531 which the new parallel dimension is inserted. 1532 - inner_parallel: specifies whether the parallel dimension is before or 1533 after the reduction dimension in the splitting op. 1534 - use_scaling_algorithm: whether to use a scaling based formulation that 1535 does not create an ExpandShapeOp (default: do not use scaling) 1536 - use_alloc: whether to use an alloc op to allocate the temporary 1537 tensor (default: do not use alloc op) 1538 1539 #### Return modes 1540 1541 This operation ignores non-Linalg ops and drops them in the return. 1542 This operation produces a definite failure if the splitting fails for any 1543 reason. 1544 1545 If all the operations referred to by the `target` handle split 1546 properly, the transform succeeds. Otherwise the transform produces a 1547 silenceable failure. The 4 returned handles points to only the subset of 1548 successfully produced computational operations, which can all be empty. 1549 This 4 returned handles point to: 1550 - the init op (or tensor_alloc op if use_alloc = true), 1551 - the fill op used to initialize the neutral element, 1552 - the split op and 1553 - the result-combining op. 1554 1555 #### Example (default: `use_scaling_algorithm = false, use_alloc = false`): 1556 1557 ``` 1558 %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, 1559 affine_map<(d0) -> ()>], 1560 iterator_types = ["reduction"]} 1561 ins(%in : tensor<32xf32>) 1562 outs(%out : tensor<f32>) { 1563 ^bb0(%arg1: f32, %arg2: f32): 1564 %y = arith.addf %arg1, %arg2 : f32 1565 linalg.yield %y : f32 1566 } -> tensor<f32> 1567 ``` 1568 1569 is split into: 1570 1571 ``` 1572 %cst = arith.constant 0.000000e+00 : f32 1573 %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32> 1574 %1 = tensor.empty() : tensor<4xf32> 1575 %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> 1576 %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 1577 affine_map<(d0, d1) -> (d0)>], 1578 iterator_types = ["parallel", "reduction"]} 1579 ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) { 1580 ^bb0(%arg3: f32, %arg5: f32): 1581 %5 = arith.addf %arg3, %arg4 : f32 1582 linalg.yield %5 : f32 1583 } -> tensor<4xf32> 1584 %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, 1585 affine_map<(d0) -> ()>], 1586 iterator_types = ["reduction"]} 1587 ins(%3 : tensor<4xf32>) outs(%out : tensor<f32>) { 1588 ^bb0(%arg3: f32, %arg4: f32): 1589 %5 = arith.addf %arg3, %arg4 : f32 1590 linalg.yield %5 : f32 1591 } -> tensor<f32> 1592 ``` 1593 1594 #### Example (`use_scaling_algorithm = true, use_alloc = true`): 1595 1596 Instead of introducing an ExpandShapeOp, this scaling-based implementation 1597 rewrites a reduction dimension `k` into `k * split_factor + kk`. 1598 The dimension `kk` is added as an extra parallel dimension to the 1599 intermediate output tensor at position `insert_split_dimension`. 1600 1601 Consider a minimal example where `k` is reduced: 1602 O(i, j) += I(i, j, k) 1603 Assume i=3, j=5, k=128, split_factor=16 and insert_split_dimension=0. 1604 The compute is rewritten as: 1605 a. O_i(kk, i, j) += I(i, j, 16 * k + kk) 1606 b. O(i, j) += O_i(kk, i, j) 1607 The intermediate tensor O_i is of shape (128/16)x3x5 == 8x3x5. 1608 1609 #### Example: 1610 1611 ``` 1612 %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>) 1613 outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> 1614 ``` 1615 1616 Is transformed to: 1617 1618 ``` 1619 #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d2 * 4 + d3)> 1620 #map1 = affine_map<(d0, d1, d2, d3) -> (d2 * 4 + d3, d1)> 1621 #map2 = affine_map<(d0, d1, d2, d3) -> (d2, d3)> 1622 #map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> 1623 #map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 1624 #map5 = affine_map<(d0, d1, d2) -> (d0, d1)> 1625 %0 = tensor.empty() : tensor<16x32x64xf32> 1626 %cst = arith.constant 0.000000e+00 : f32 1627 %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) -> 1628 tensor<16x32x64xf32> 1629 %2 = tensor.empty() : tensor<64x4xi1> 1630 1631 %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3], 1632 iterator_types = ["parallel", "parallel", "parallel", "reduction"]} 1633 ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>) 1634 outs(%1 : tensor<16x32x64xf32>) { 1635 ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32): 1636 %5 = arith.mulf %arg3, %arg4 : f32 1637 %6 = arith.addf %arg6, %5 : f32 1638 linalg.yield %6 : f32 1639 } -> tensor<16x32x64xf32> 1640 1641 %4 = linalg.generic {indexing_maps = [#map4, #map5], 1642 iterator_types = ["parallel", "parallel", "reduction"]} 1643 ins(%3 : tensor<16x32x64xf32>) 1644 outs(%C : tensor<16x32xf32>) { 1645 ^bb0(%arg3: f32, %arg4: f32): 1646 %5 = arith.addf %arg3, %arg4 : f32 1647 linalg.yield %5 : f32 1648 } -> tensor<16x32xf32> 1649 1650 return %4 : tensor<16x32xf32> 1651 ``` 1652 }]; 1653 1654 let arguments = (ins TransformHandleTypeInterface:$target, 1655 DefaultValuedAttr<I64Attr, "{}">:$split_factor, 1656 DefaultValuedAttr<I64Attr, "{}">:$insert_split_dimension, 1657 UnitAttr:$inner_parallel, 1658 UnitAttr:$use_scaling_algorithm, 1659 UnitAttr:$use_alloc); 1660 let results = (outs TransformHandleTypeInterface:$init_or_alloc_op, 1661 TransformHandleTypeInterface:$fill_op, 1662 TransformHandleTypeInterface:$split_linalg_op, 1663 TransformHandleTypeInterface:$combining_linalg_op); 1664 1665 let assemblyFormat = 1666 "$target attr-dict `:`" 1667 "functional-type(operands, results)"; 1668 1669 let builders = [ 1670 OpBuilder<(ins "Value":$target, 1671 "int64_t":$splitFactor, 1672 "int64_t":$insertSplitDimension, 1673 CArg<"bool", "false">:$innerParallel, 1674 CArg<"bool", "false">:$useScalingAlgorithm, 1675 CArg<"bool", "false">:$useAlloc)> 1676 ]; 1677 1678 let extraClassDeclaration = [{ 1679 ::mlir::DiagnosedSilenceableFailure applyToOne( 1680 ::mlir::transform::TransformRewriter &rewriter, 1681 ::mlir::linalg::LinalgOp target, 1682 ::mlir::transform::ApplyToEachResultList &results, 1683 ::mlir::transform::TransformState &state); 1684 }]; 1685} 1686 1687//===----------------------------------------------------------------------===// 1688// TileReductionUsingForOp 1689//===----------------------------------------------------------------------===// 1690 1691def TileReductionUsingForOp : Op<Transform_Dialect, "structured.tile_reduction_using_for", 1692 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 1693 TransformEachOpTrait, TransformOpInterface, 1694 ReportTrackingListenerFailuresOpTrait]> { 1695 let description = [{ 1696 Indicates that the given `target` op should be transformed with the 1697 `tileReduction` transformation with the tile size provided as attribute. 1698 1699 This transformation tiles the `target` along the reduction dimensions. It 1700 creates a tensor initialized with the identity value. Then it creates nested 1701 loops with a parallel version of `target` op inside. The parallel op 1702 dimensions are less or equal to the tile size passed by user. 1703 After the loop a merge operation is created to do a final reduction with the 1704 partial reductions. 1705 The initial tensor always uses the tile size dimension. This may overallocate 1706 if the tile size is greater than the reduction dimension. 1707 1708 #### Return modes 1709 1710 Returns 4 handles associated with (in order): 1711 - the fill op used to initialize the neutral element, 1712 - the parallel tiled op and 1713 - the result-combining op, 1714 - the parent `for` op. 1715 1716 #### Example: 1717 1718 ``` 1719 %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 1720 affine_map<(d0, d1) -> (d0)>], 1721 iterator_types = ["parallel", "reduction"]} 1722 ins(%arg0 : tensor<?x?xf32>) 1723 outs(%out : tensor<?xf32>) { 1724 ^bb0(%arg7: f32, %arg9: f32): 1725 %1 = arith.addf %arg7, %arg9 : f32 1726 linalg.yield %1 : f32 1727 } -> tensor<?xf32> 1728 return %red : tensor<?xf32> 1729 ``` 1730 1731 is transformed into: 1732 1733 ``` 1734 %0 = tensor.empty(%dim_1) : tensor<?x5xf32> 1735 %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32> 1736 %2 = scf.for %arg2 = %c0 to %dim_0 step %c5 iter_args(%arg3 = %1) -> (tensor<?x5xf32>) { 1737 %extracted_slice = tensor.extract_slice %1[0, 0] [%dim, 5] [1, 1] : tensor<?x5xf32> to tensor<?x5xf32> 1738 %extracted_slice_2 = tensor.extract_slice %arg0[0, %arg2] [%dim, 5] [1, 1] : tensor<?x?xf32> to tensor<?x5xf32> 1739 %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 1740 affine_map<(d0, d1) -> (d0, d1)>], 1741 iterator_types = ["parallel", "parallel"]} 1742 ins(%extracted_slice_2 : tensor<?x5xf32>) 1743 outs(%extracted_slice : tensor<?x5xf32>) { 1744 ^bb0(%in: f32, %out: f32): 1745 %5 = arith.addf %in, %out : f32 1746 linalg.yield %5 : f32 1747 } -> tensor<?x5xf32> 1748 %dim_3 = tensor.dim %1, %c0 : tensor<?x5xf32> 1749 %inserted_slice = tensor.insert_slice %4 into %arg3[0, 0] [%dim_3, 5] [1, 1] : tensor<?x5xf32> into tensor<?x5xf32> 1750 scf.yield %inserted_slice : tensor<?x5xf32> 1751 } 1752 %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 1753 affine_map<(d0, d1) -> (d0)>], 1754 iterator_types = ["parallel", "reduction"]} 1755 ins(%2 : tensor<?x5xf32>) 1756 outs(%arg1 : tensor<?xf32>) { 1757 ^bb0(%in: f32, %out: f32): 1758 %4 = arith.addf %in, %out : f32 1759 linalg.yield %4 : f32 1760 } -> tensor<?xf32> 1761 ``` 1762 }]; 1763 1764 // TODO: support mixed static-dynamic (see TileUsingForallOp). 1765 let arguments = (ins TransformHandleTypeInterface:$target, 1766 DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$tile_sizes); 1767 let results = (outs Variadic<TransformHandleTypeInterface>:$fill_op, 1768 TransformHandleTypeInterface:$split_op, 1769 TransformHandleTypeInterface:$combining_op, 1770 TransformHandleTypeInterface:$for_op); 1771 1772 let builders = [ 1773 OpBuilder<(ins "Value":$target, 1774 "ArrayRef<int64_t>":$staticTileSizes)> 1775 ]; 1776 1777 let assemblyFormat = [{ 1778 $target 1779 `by` `tile_sizes` `=` $tile_sizes 1780 attr-dict 1781 `:` functional-type(operands, results) 1782 }]; 1783 1784 let extraClassDeclaration = [{ 1785 ::mlir::DiagnosedSilenceableFailure applyToOne( 1786 ::mlir::transform::TransformRewriter &rewriter, 1787 Operation *target, 1788 ::mlir::transform::ApplyToEachResultList &results, 1789 ::mlir::transform::TransformState &state); 1790 }]; 1791} 1792 1793//===----------------------------------------------------------------------===// 1794// TileReductionUsingForallOp 1795//===----------------------------------------------------------------------===// 1796 1797def TileReductionUsingForallOp : 1798 Op<Transform_Dialect, "structured.tile_reduction_using_forall", 1799 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 1800 TransformEachOpTrait, TransformOpInterface, 1801 ReportTrackingListenerFailuresOpTrait]> { 1802 let description = [{ 1803 Tile a PartialReductionOpInterface op to a tiled `scf.forall` doing 1804 partial reduction. 1805 1806 This transformation tiles the `target` along the reduction dimensions. It 1807 creates a tensor initialized with the identity value. Then it creates a 1808 `scf.forall` loops with the number threads given by `num_threads`. 1809 The op is tiled op with a size equal to `floordiv(size, num_threads)`. 1810 All the partial reduction value is are parallel inserted to create a new 1811 tensor. After the loop a merge operation is created to do a final reduction 1812 with the partial reductions tensor. 1813 If an extra `tile_sizes` parameter is passed the tiles are cyclically 1814 distributed on the threads of the `scf.foralls` loop. 1815 1816 #### Return modes 1817 1818 Returns 4 handles associated with (in order): 1819 - the fill op used to initialize the neutral element, 1820 - the parallel tiled op and 1821 - the result-combining op, 1822 - the parent `forall` op. 1823 1824 #### Example: 1825 1826 ``` 1827 %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 1828 affine_map<(d0, d1) -> (d0)>], 1829 iterator_types = ["parallel", "reduction"]} 1830 ins(%arg0 : tensor<?x?xf32>) 1831 outs(%out : tensor<?xf32>) { 1832 ^bb0(%arg7: f32, %arg9: f32): 1833 %1 = arith.addf %arg7, %arg9 : f32 1834 linalg.yield %1 : f32 1835 } -> tensor<?xf32> 1836 return %red : tensor<?xf32> 1837 ``` 1838 1839 is transformed into: 1840 1841 ``` 1842 %0 = tensor.empty(%dim_1) : tensor<?x5xf32> 1843 %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32> 1844 %2 = scf.forall (%arg2) in (%c5) shared_outs(%arg3 = %1) -> (tensor<?x5xf32>) { 1845 %4 = affine.min #map(%arg2)[%dim_0] 1846 %5 = affine.max #map1(%4) 1847 %extracted_slice = tensor.extract_slice %arg3[0, %arg2] [%dim, 1] [1, 1] : tensor<?x5xf32> to tensor<?xf32> 1848 %6 = affine.apply #map2(%arg2)[%dim_0] 1849 %extracted_slice_2 = tensor.extract_slice %arg0[0, %6] [%dim, %5] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 1850 %extracted_slice_3 = tensor.extract_slice %extracted_slice[0] [%dim] [1] : tensor<?xf32> to tensor<?xf32> 1851 %7 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%extracted_slice_2 : tensor<?x?xf32>) outs(%extracted_slice_3 : tensor<?xf32>) { 1852 ^bb0(%in: f32, %out: f32): 1853 %9 = arith.addf %in, %out : f32 1854 linalg.yield %9 : f32 1855 } -> tensor<?xf32> 1856 scf.forall.in_parallel { 1857 tensor.parallel_insert_slice %7 into %arg3[0, %arg2] [%dim, 1] [1, 1] : tensor<?xf32> into tensor<?x5xf32> 1858 } 1859 } {mapping = []} 1860 %3 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor<?x5xf32>) outs(%arg1 : tensor<?xf32>) { 1861 ^bb0(%in: f32, %out: f32): 1862 %4 = arith.addf %in, %out : f32 1863 linalg.yield %4 : f32 1864 } -> tensor<?xf32> 1865 ``` 1866 }]; 1867 1868 // TODO: support mixed static-dynamic (see TileUsingForallOp). 1869 let arguments = (ins TransformHandleTypeInterface:$target, 1870 DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$num_threads, 1871 DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$tile_sizes, 1872 OptionalAttr<DeviceMappingArrayAttr>:$mapping); 1873 let results = (outs Variadic<TransformHandleTypeInterface>:$fill_op, 1874 TransformHandleTypeInterface:$split_linalg_op, 1875 TransformHandleTypeInterface:$combining_linalg_op, 1876 TransformHandleTypeInterface:$forall_op); 1877 1878 let builders = [ 1879 OpBuilder<(ins "Value":$target, 1880 "ArrayRef<int64_t>":$staticNumThreads, 1881 "ArrayRef<int64_t>":$staticTileSizes, 1882 CArg<"ArrayAttr", "{}">:$mapping)> 1883 ]; 1884 1885 let assemblyFormat = [{ 1886 $target 1887 `by` 1888 (`num_threads` `=` $num_threads^)? 1889 (`,` `tile_sizes` `=` $tile_sizes^)? 1890 (`,` `mapping` `=` $mapping^)? 1891 attr-dict 1892 `:` functional-type(operands, results) 1893 }]; 1894 1895 let extraClassDeclaration = [{ 1896 ::mlir::DiagnosedSilenceableFailure applyToOne( 1897 ::mlir::transform::TransformRewriter &rewriter, 1898 ::mlir::linalg::LinalgOp target, 1899 ::mlir::transform::ApplyToEachResultList &results, 1900 ::mlir::transform::TransformState &state); 1901 }]; 1902 1903} 1904 1905//===----------------------------------------------------------------------===// 1906// ContinuousTileSizesOp 1907//===----------------------------------------------------------------------===// 1908 1909def ContinuousTileSizesOp : Op<Transform_Dialect, "structured.continuous_tile_sizes", 1910 [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 1911 DeclareOpInterfaceMethods<TransformOpInterface>, 1912 ReportTrackingListenerFailuresOpTrait]> { 1913 let description = [{ 1914 This transform emits the IR computing the list of (1) exponentially 1915 diminishing tile sizes that are powers of 2; and (2) the corresponding 1916 chunk-sizes the target op should be split into along the given dimension. 1917 1918 For example, for `target_size` 9, and `dimension` 0 for the following 1919 linalg op as target 1920 1921 ``` 1922 %0 = linalg.matmul ins(%arg0, %arg1: tensor<25x34xf32>, tensor<34x25xf32>) 1923 outs(%arg2: tensor<25x25xf32>) 1924 ``` 1925 1926 the first result `tile_sizes` will be a list of diminishing tile sizes 1927 9, 4, 2, 1; and the second result will be a list of chunk sizes 1928 18, 4, 2, 1 that the corresponding dimension should be split into. 1929 1930 After the target op has been split along the given dimension (for example 1931 using multiway split), each chunk can be tiled with the corresponding tile 1932 size in the `tile_sizes` list generated as a result of this op. 1933 1934 Specifying the output type as !transform.param<i64> will cause `tile_sizes` 1935 and `chunk_sizes` to be computed statically and not dynamically. 1936 }]; 1937 1938 let arguments = (ins TransformHandleTypeInterface:$target, 1939 ConfinedAttr<I64Attr, [IntNonNegative]>:$dimension, 1940 ConfinedAttr<I64Attr, [IntNonNegative]>:$target_size); 1941 let results = (outs TransformAnyParamTypeOrAnyHandle:$tile_sizes, 1942 TransformAnyParamTypeOrAnyHandle:$chunk_sizes); 1943 let hasVerifier = 1; 1944 let assemblyFormat = 1945 "$target attr-dict `:` custom<ContinuousTileSizeTypes>(" 1946 "type($target), type($tile_sizes), type($chunk_sizes))"; 1947 1948} 1949 1950//===----------------------------------------------------------------------===// 1951// TileUsingForOp 1952//===----------------------------------------------------------------------===// 1953 1954def TileUsingForOp : Op<Transform_Dialect, "structured.tile_using_for", 1955 [DeclareOpInterfaceMethods<TransformOpInterface>, 1956 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 1957 ReportTrackingListenerFailuresOpTrait]> { 1958 let description = [{ 1959 Indicates that the given `target` op should be tiled with the given sizes. 1960 This transform generates a loop nest with a smaller ("tiled") target 1961 operation in its body. Currently limited to LinalgOps. 1962 1963 Tile sizes may be known at transformation time, in which case they are 1964 expected to be provided in the `static_size` attribute, or not, in which 1965 case the tile value must be computed by the payload IR and the handle to the 1966 operation computing it must be provided through `dynamic_sizes`. When the 1967 sizes are not known statically, the corresponding entry in the 1968 `static_sizes` attribute must be set to `ShapedType::kDynamic`. Only 1969 the dynamic sizes must be provided in `dynamic_sizes`, i.e., there should 1970 be as many handles as `ShapedType::kDynamic` values in the 1971 `static_sizes` attribute. A static size of `0` indicates that the dimension 1972 should not be tiled. No loop will be generated for such dimensions. If all 1973 tile sizes are `0`, this transform is effectively a no-op. 1974 1975 This op returns handles to the tiled op (in the generated loop nest) and the 1976 generated loops. The number of loops is the number of tile sizes that are 1977 statically known to be non-zero. 1978 1979 #### Return modes 1980 1981 On success, the resulting handles are associated with co-indexed lists of 1982 tiled operations and loops around them. 1983 1984 This operation only supports Linalg ops and produces a silenceable failure 1985 if the input contains any non-Linalg ops. The ops preceding it in the list 1986 associated with the `target` handle will have been tiled. 1987 1988 This operation produces a silenceable failure if the `dynamic_sizes` handles 1989 are associated with lists of payload operations of a size different than 1990 that of the list associated with the `target` handle. 1991 1992 If the internal implementation of tiling for any of the operations fails, 1993 produces a definite failure. 1994 }]; 1995 1996 let arguments = (ins TransformHandleTypeInterface:$target, 1997 Variadic<TransformAnyParamTypeOrAnyHandle>:$dynamic_sizes, 1998 DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sizes, 1999 DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$interchange, 2000 DefaultValuedOptionalAttr<DenseBoolArrayAttr, "{}">:$scalable_sizes); 2001 let results = (outs TransformHandleTypeInterface:$tiled_linalg_op, 2002 Variadic<TransformHandleTypeInterface>:$loops); 2003 let builders = [ 2004 OpBuilder<(ins "TypeRange":$loopTypes, 2005 "Value":$target, 2006 "ArrayRef<int64_t>":$staticTileSizes, 2007 CArg<"ArrayRef<int64_t>", "{}">:$interchange, 2008 CArg<"std::optional<ArrayRef<bool>>", "std::nullopt">: 2009 $scalableSizes)>, 2010 OpBuilder<(ins "TypeRange":$loopTypes, 2011 "Value":$target, 2012 "ArrayRef<OpFoldResult>":$mixedTileSizes, 2013 CArg<"ArrayRef<int64_t>", "{}">:$interchange, 2014 CArg<"std::optional<ArrayRef<bool>>", "std::nullopt">: 2015 $scalableSizes)>, 2016 OpBuilder<(ins "Value":$target, 2017 "ArrayRef<int64_t>":$staticTileSizes, 2018 CArg<"ArrayRef<int64_t>", "{}">:$interchange, 2019 CArg<"std::optional<ArrayRef<bool>>", "std::nullopt">: 2020 $scalableSizes)>, 2021 OpBuilder<(ins "Value":$target, 2022 "ArrayRef<OpFoldResult>":$mixedTileSizes, 2023 CArg<"ArrayRef<int64_t>", "{}">:$interchange, 2024 CArg<"std::optional<ArrayRef<bool>>", "std::nullopt">: 2025 $scalableSizes)>, 2026 ]; 2027 2028 let assemblyFormat = [{ 2029 $target 2030 `tile_sizes` custom<DynamicIndexList>( 2031 $dynamic_sizes, 2032 $static_sizes, 2033 $scalable_sizes) 2034 (`interchange` `=` $interchange^)? 2035 attr-dict 2036 `:` functional-type(operands, results) 2037 }]; 2038 2039 let hasVerifier = 1; 2040 2041 let extraClassDeclaration = [{ 2042 /// Returns the list of tile sizes, which may be static (Attribute) or 2043 /// dynamic (Value). 2044 SmallVector<OpFoldResult> getMixedSizes(); 2045 }]; 2046} 2047 2048//===----------------------------------------------------------------------===// 2049// TileUsingForallOp 2050//===----------------------------------------------------------------------===// 2051 2052def TileUsingForallOp : 2053 Op<Transform_Dialect, "structured.tile_using_forall", 2054 [AttrSizedOperandSegments, 2055 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 2056 TransformOpInterface, ReportTrackingListenerFailuresOpTrait]> { 2057 let description = [{ 2058 Tile a TilingInterface op to a tiled `scf.forall`. 2059 2060 Tiling is applied by either specifying `num_threads` or `tile_size`. If 2061 `num_threads` is specified, then the tile size for each dimension `i` is 2062 calculated dynamically via `ceilDiv(dimSize[i], num_threads[i])`. 2063 `num_threads` and `tile_size` can be either static index attributes or 2064 operation handles (or a mix thereof). Operation handles must be mapped to 2065 exactly one op that has exactly one result of index type. 2066 2067 Static zero tile sizes indicate that the dimension is not tiled and can be 2068 thought of as tiling by the full size of data. 2069 2070 It is the user's responsibility to ensure that `num_threads/tile_sizes` is 2071 a valid tiling specification (i.e. that only tiles parallel dimensions, 2072 e.g. in the Linalg case). If the dimension is not parallelizable, a warning 2073 is issued to notify the user that the generated code is not safe to 2074 parallelize. 2075 2076 If non-empty, the `mapping` is added as an attribute to the 2077 resulting `scf.forall`. 2078 2079 Note: `tile_sizes` and `num_threads` are variadic. Each tile size/number of 2080 threads can be an index attribute or a transform handle that is mapped to 2081 exactly one payload op with exactly one index result. 2082 2083 #### Return modes 2084 2085 This operation ignores ops that do not implement the TilingInterface and 2086 drops them in the return. 2087 2088 If all the operations referred to by the `target` handle tile 2089 successfully, the transform succeeds. 2090 Otherwise the transform produces a silenceable failure. 2091 2092 The two returned handles point to only the subset of successfully produced 2093 tiled operations, which can all be empty. 2094 2095 These two returned handles point to: 2096 - the tiled op that implements TilingInterface, 2097 - the new scf.forall op. 2098 2099 #### Example using `num_threads` 2100 2101 ``` 2102 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 2103 : (!transform.any_op) -> !transform.any_op 2104 %3:2 = transform.structured.tile_using_forall %0 num_threads [10, 20] 2105 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) 2106 ``` 2107 2108 #### Example using `tile_sizes` 2109 2110 ``` 2111 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 2112 : (!transform.any_op) -> !transform.any_op 2113 %sz = transform.structured.match ... 2114 %3:2 = transform.structured.tile_using_forall %0 tile_sizes [0, %sz, 20] 2115 : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) 2116 ``` 2117 }]; 2118 2119 let arguments = (ins TransformHandleTypeInterface:$target, 2120 Variadic<TransformAnyParamTypeOrAnyHandle>:$num_threads, 2121 Variadic<TransformAnyParamTypeOrAnyHandle>:$tile_sizes, 2122 Optional<TransformAnyParamTypeOrAnyHandle>:$packed_num_threads, 2123 Optional<TransformAnyParamTypeOrAnyHandle>:$packed_tile_sizes, 2124 DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_num_threads, 2125 DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_tile_sizes, 2126 OptionalAttr<DeviceMappingArrayAttr>:$mapping); 2127 let results = (outs TransformHandleTypeInterface:$tiled_op, 2128 TransformHandleTypeInterface:$forall_op); 2129 2130 let builders = [ 2131 OpBuilder<(ins "Value":$target, 2132 "ArrayRef<int64_t>":$staticTileSizes, 2133 CArg<"::mlir::transform::TileSizesSpec", 2134 "::mlir::transform::TileSizesSpec()">, 2135 CArg<"ArrayAttr", "{}">:$mapping)>, 2136 OpBuilder<(ins "Value":$target, 2137 "ArrayRef<OpFoldResult>":$mixedTileSizes, 2138 CArg<"::mlir::transform::TileSizesSpec", 2139 "::mlir::transform::TileSizesSpec()">, 2140 CArg<"ArrayAttr", "{}">:$mapping)>, 2141 OpBuilder<(ins "Value":$target, 2142 "ArrayRef<int64_t>":$staticNumThreads, 2143 CArg<"::mlir::transform::NumThreadsSpec", 2144 "::mlir::transform::NumThreadsSpec()">, 2145 CArg<"ArrayAttr", "{}">:$mapping)>, 2146 OpBuilder<(ins "Value":$target, 2147 "ArrayRef<OpFoldResult>":$mixedNumThreads, 2148 CArg<"::mlir::transform::NumThreadsSpec", 2149 "::mlir::transform::NumThreadsSpec()">, 2150 CArg<"ArrayAttr", "{}">:$mapping)> 2151 ]; 2152 2153 let assemblyFormat = [{ 2154 $target oilist( 2155 `num_threads` custom<PackedOrDynamicIndexList>($packed_num_threads, 2156 $num_threads, 2157 $static_num_threads) | 2158 `tile_sizes` custom<PackedOrDynamicIndexList>($packed_tile_sizes, 2159 $tile_sizes, 2160 $static_tile_sizes)) 2161 (`(` `mapping` `=` $mapping^ `)`)? attr-dict 2162 `:` functional-type(operands, results) 2163 }]; 2164 let hasVerifier = 1; 2165 2166 let extraClassDeclaration = [{ 2167 ::mlir::DiagnosedSilenceableFailure apply( 2168 ::mlir::transform::TransformRewriter &rewriter, 2169 ::mlir::transform::TransformResults &transformResults, 2170 ::mlir::transform::TransformState &state); 2171 2172 ::llvm::SmallVector<::mlir::OpFoldResult> getMixedNumThreads(); 2173 ::llvm::SmallVector<::mlir::OpFoldResult> getMixedTileSizes(); 2174 }]; 2175} 2176 2177//===----------------------------------------------------------------------===// 2178// VectorizeChildrenAndApplyPatternsOp 2179//===----------------------------------------------------------------------===// 2180 2181def VectorizeChildrenAndApplyPatternsOp : 2182 Op<Transform_Dialect, "structured.vectorize_children_and_apply_patterns", 2183 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 2184 TransformEachOpTrait, TransformOpInterface, 2185 ReportTrackingListenerFailuresOpTrait]> { 2186 let description = [{ 2187 Vectorizes all children contained in the given `target` using the 2188 configuration specified by the attributes of this op. This only vectorizes 2189 structured ops that operate on shaped types and does not vectorize loops or 2190 straight-line. Internally, it applies a set of rewrite patterns, some of 2191 which enable vectorization and some of which clean up the results. 2192 Therefore, it can only be applied to an op with the "isolated from above" 2193 property. This transformation only fails if the entire pattern rewriting 2194 failed, i.e., it does **not** fail when no ops were vectorized. 2195 2196 Finer granularity can be achieved either with the `VectorizeOp` for 2197 individual ops or by outlining the target part of the payload IR into, e.g., 2198 a function, performing this transformation, and inlining it back. 2199 2200 Note that this transformation invalidates the handles to any payload IR 2201 operation that is contained inside the vectorization target. 2202 2203 This transformation supports the following attributes: 2204 - `vectorize_padding`: a `UnitAttr` to activate the vectorization of 2205 `tensor.pad` ops. Different pipelines may prefer to lower such ops to 2206 loops. 2207 - `disable_multi_reduction_to_contract_patterns`: a `UnitAttr` to deactivate 2208 the rewrite of `vector.multi_reduction` to `vector.contract`. This is 2209 intended to be used in tests only. 2210 - `disable_transfer_permutation_map_lowering_patterns`: a `UnitAttr` to 2211 deactivate the rewrite of `vector.transfer` with permutation maps into 2212 explicit `vector.transpose` operations. This is intended to be used in 2213 tests only but may be promoted to a first class attribute in the future. 2214 2215 #### Return modes: 2216 2217 This operation produces a definite failure if vectorization fails for any 2218 reason. 2219 The operation always returns the handle to the target op that is expected 2220 to be isolated from above. 2221 }]; 2222 2223 let arguments = (ins TransformHandleTypeInterface:$target, 2224 UnitAttr:$vectorize_padding, 2225 UnitAttr:$vectorize_nd_extract, 2226 UnitAttr:$flatten_1d_depthwise_conv, 2227 UnitAttr:$disable_multi_reduction_to_contract_patterns, 2228 UnitAttr:$disable_transfer_permutation_map_lowering_patterns); 2229 let results = (outs TransformHandleTypeInterface:$transformed); 2230 2231 let assemblyFormat = 2232 "$target attr-dict `:`" 2233 "functional-type(operands, results)"; 2234 2235 let builders = [ 2236 OpBuilder<(ins "Value":$target, 2237 CArg<"bool", "false">:$vectorizePadding, 2238 CArg<"bool", "false">:$vectorizeNDExtract, 2239 CArg<"bool", "false">:$flatten1DDepthwise)> 2240 ]; 2241 let extraClassDeclaration = [{ 2242 ::mlir::DiagnosedSilenceableFailure applyToOne( 2243 ::mlir::transform::TransformRewriter &rewriter, 2244 ::mlir::Operation *target, 2245 ::mlir::transform::ApplyToEachResultList &results, 2246 ::mlir::transform::TransformState &state); 2247 }]; 2248} 2249 2250def VectorizeOp : Op<Transform_Dialect, "structured.vectorize", 2251 [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 2252 TransformOpInterface, ReportTrackingListenerFailuresOpTrait]> { 2253 let description = [{ 2254 Vectorize the target ops, which must be Linalg ops. 2255 2256 Use the optional vector sizes to specify exactly what configuration the 2257 vectorizer should use. It will then use masked vectors of the specified 2258 size to enforce this configuration ("masked vectorization"). If no vector 2259 sizes are specified, the vectorizer will infer the shapes to use from the 2260 target Linalg ops ("regular vectorization"). More specifically: 2261 2262 ```mlir 2263 # Masked vectorization - vector sizes are specified explicitly 2264 transform.structured.vectorize %target vector_sizes [1, 4] : !transform.any_op 2265 # Regular vectorization - vector sizes are inferred from the target Op 2266 transform.structured.vectorize %target : !transform.any_op 2267 ``` 2268 2269 The vector sizes can be either static or dynamic (SSA values). In case of 2270 SSA values, the handle must be mapped to exactly one payload op with 2271 exactly one index-typed result. 2272 2273 Note: The input vector sizes must be bigger than or equal to their 2274 counterpart iteration space sizes. 2275 2276 Typically this operator should be applied to linalg operations that have 2277 already been tiled to the appropriate sizes. 2278 2279 #### Return modes: 2280 2281 This operation produces a silenceable failure if at least one target op is 2282 not a Linalg op or fails to vectorize. It produces a definite failure if 2283 the dynamic vector sizes (SSA values) do not satisfy the constraints 2284 mentioned above. 2285 }]; 2286 2287 let arguments = (ins TransformHandleTypeInterface:$target, 2288 Variadic<TransformAnyParamTypeOrAnyHandle>:$vector_sizes, 2289 DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">: 2290 $static_vector_sizes, 2291 OptionalAttr<UnitAttr>:$vectorize_nd_extract, 2292 DefaultValuedOptionalAttr<DenseBoolArrayAttr, "{}">: 2293 $scalable_sizes); 2294 2295 let results = (outs); 2296 2297 // We use oilist here to elide the optional `vector_sizes` when empty list 2298 // is passed. 2299 let assemblyFormat = [{ 2300 $target oilist( 2301 `vector_sizes` custom<DynamicIndexList>( 2302 $vector_sizes, 2303 $static_vector_sizes, 2304 $scalable_sizes)) 2305 attr-dict 2306 `:` type($target)(`,`type($vector_sizes)^)? 2307 }]; 2308 2309 let hasVerifier = 1; 2310 2311 let extraClassDeclaration = [{ 2312 // TODO: applyToOne. 2313 ::mlir::DiagnosedSilenceableFailure apply( 2314 ::mlir::transform::TransformRewriter &rewriter, 2315 ::mlir::transform::TransformResults &transformResults, 2316 ::mlir::transform::TransformState &state); 2317 2318 ::llvm::SmallVector<::mlir::OpFoldResult> getMixedVectorSizes(); 2319 }]; 2320} 2321 2322//===----------------------------------------------------------------------===// 2323// HoistRedundantVectorTransfersOp 2324//===----------------------------------------------------------------------===// 2325 2326def HoistRedundantVectorTransfersOp : 2327 Op<Transform_Dialect, "structured.hoist_redundant_vector_transfers", 2328 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 2329 TransformEachOpTrait, TransformOpInterface, 2330 ReportTrackingListenerFailuresOpTrait]> { 2331 let description = [{ 2332 Hoist vector.transfer_read / vector.transfer_write pairs out of immediately 2333 enclosing scf::ForOp iteratively, if the following conditions are true: 2334 1. The 2 ops access the same memref with the same indices. 2335 2. All operands are invariant under the enclosing scf::ForOp. 2336 3. No uses of the memref either dominate the transfer_read or are 2337 dominated by the transfer_write (i.e. no aliasing between the write and 2338 the read across the loop) 2339 2340 WARNING: This hoisting does not model parallelism and is generally incorrect 2341 when used on distributed loops with memref semantics! 2342 TODO: obsolete and should be retired. 2343 2344 #### Return modes: 2345 2346 The operation always succeeds and returns a handle to the transformed 2347 function op. 2348 }]; 2349 2350 let arguments = (ins TransformHandleTypeInterface:$target, 2351 UnitAttr:$verify_non_zero_trip); 2352 let results = (outs TransformHandleTypeInterface:$transformed); 2353 2354 let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) "; 2355 2356 let builders = [ 2357 OpBuilder<(ins "Value":$target, 2358 CArg<"bool", "false">:$verify_non_zero_trip)>, 2359 ]; 2360 let extraClassDeclaration = [{ 2361 ::mlir::DiagnosedSilenceableFailure applyToOne( 2362 ::mlir::transform::TransformRewriter &rewriter, 2363 ::mlir::func::FuncOp target, 2364 ::mlir::transform::ApplyToEachResultList &results, 2365 ::mlir::transform::TransformState &state); 2366 }]; 2367} 2368 2369//===----------------------------------------------------------------------===// 2370// HoistRedundantVectorBroadcastsOp 2371//===----------------------------------------------------------------------===// 2372 2373def HoistRedundantVectorBroadcastsOp : 2374 Op<Transform_Dialect, "structured.hoist_redundant_vector_broadcasts", 2375 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 2376 TransformEachOpTrait, TransformOpInterface, 2377 ReportTrackingListenerFailuresOpTrait]> { 2378 let description = [{ 2379 Hoist vector.extract / vector.broadcasts pairs out of immediately 2380 enclosing scf::ForOp iteratively. 2381 2382 #### Return modes: 2383 2384 The operation always succeeds and returns a handle to the transformed 2385 function op. 2386 }]; 2387 2388 let arguments = (ins TransformHandleTypeInterface:$target); 2389 let results = (outs TransformHandleTypeInterface:$transformed); 2390 2391 let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) "; 2392 2393 let builders = [ 2394 OpBuilder<(ins "Value":$target)>, 2395 ]; 2396 let extraClassDeclaration = [{ 2397 ::mlir::DiagnosedSilenceableFailure applyToOne( 2398 ::mlir::transform::TransformRewriter &rewriter, 2399 ::mlir::Operation *target, 2400 ::mlir::transform::ApplyToEachResultList &results, 2401 ::mlir::transform::TransformState &state); 2402 }]; 2403} 2404 2405//===----------------------------------------------------------------------===// 2406// ConvertConv2DToImg2ColOp 2407//===----------------------------------------------------------------------===// 2408 2409def ConvertConv2DToImg2ColOp : Op<Transform_Dialect, 2410 "structured.convert_conv2d_to_img2col", 2411 [FunctionalStyleTransformOpTrait, 2412 MemoryEffectsOpInterface, 2413 TransformOpInterface, 2414 TransformEachOpTrait, 2415 ReportTrackingListenerFailuresOpTrait]> { 2416 let description = [{ 2417 Convert linalg.conv_2d_xxx into linalg.generic (for img2col packing) 2418 and linalg.matmul. 2419 2420 A convolution operation can be written as a matrix-matrix multiplication by 2421 unfolding the cross-correlation between input and filter and explicitly copy 2422 overlapped sliding window inputs. 2423 2424 Consider 2D input X with single channel input and output and 2x2 filter W: 2425 ``` 2426 [x(0, 0) , x(0, 1) , ..., x(0, n) ] 2427 [x(1, 0) , x(1, 1) , ..., x(1, n) ] 2428 [. , . ,. , . ] [w(0, 0), w(0, 1)] 2429 [. , . , . , . ] (conv) [w(1, 0), w(1, 1)] 2430 [. , . , ., . ] 2431 [x(n-1, 0), x(n-1, 1), ..., x(n-1, n-1)] 2432 ``` 2433 2434 The packed input data (img2col) is a matrix with |rows| = output spatial 2435 size, |columns| = filter spatial size. To compute the output Y(i, j) we need 2436 to calculate the dot product between filter window at input X(x, y)) and the 2437 filter which will look like the following where r.h.s is the img2col matrix 2438 and l.h.s is the flattned filter: 2439 ``` 2440 [x(0,0), x(0,1), x(1,0), x(1,1)] 2441 [x(0,1), x(1,1), x(0,2), x(1,2)] (matmul) [w(0,0), w(0,1), w(1,0), w(1,1)] 2442 [x(0,1), x(1,1), x(0,2), x(1,2)] 2443 [ . , . , . , . ] 2444 ``` 2445 2446 In general for 2D case with (N, H, W, C) input and (Kh, Kw, C, D) filter 2447 and output (N, Ho, Wo, D) the convolution is the following matrix-matrix 2448 multiplication (Ho x Wo, Kh x Kw x C) * (Kh x Kw x C, D) for each input in 2449 the N input. For the case where N > 1 its a batched matrxi-matrix 2450 multplication. 2451 2452 Returns two handles: 2453 - One on the operation that produces the img2col tensor. 2454 - One on the final operation of the sequence that replaces the original 2455 convolution. 2456 2457 #### Return modes: 2458 2459 Returns a definite failure if target is not isolated from above. 2460 Returns a silenceable failure if the pattern application failed. 2461 }]; 2462 2463 let arguments = (ins TransformHandleTypeInterface:$target); 2464 let results = (outs TransformHandleTypeInterface:$img2col_tensor, 2465 TransformHandleTypeInterface:$transformed); 2466 2467 let assemblyFormat = 2468 "$target attr-dict `:` functional-type($target, results)"; 2469 2470 let builders = [ 2471 OpBuilder<(ins "Value":$target)> 2472 ]; 2473 2474 let extraClassDeclaration = [{ 2475 ::mlir::DiagnosedSilenceableFailure applyToOne( 2476 ::mlir::transform::TransformRewriter &rewriter, 2477 ::mlir::linalg::LinalgOp target, 2478 ::mlir::transform::ApplyToEachResultList &results, 2479 ::mlir::transform::TransformState &state); 2480 }]; 2481} 2482 2483//===----------------------------------------------------------------------===// 2484// FlattenElementwiseLinalgOp 2485//===----------------------------------------------------------------------===// 2486 2487def FlattenElementwiseLinalgOp : Op<Transform_Dialect, 2488 "structured.flatten_elementwise", 2489 [FunctionalStyleTransformOpTrait, 2490 MemoryEffectsOpInterface, 2491 TransformOpInterface, 2492 TransformEachOpTrait, 2493 ReportTrackingListenerFailuresOpTrait]> { 2494 let description = [{ 2495 Flattens the iteration space and (applicable) operands of elementwise 2496 linalg ops to a single dimension. 2497 2498 Returns one handle: 2499 - Flattened linalg operation. 2500 2501 #### Return modes: 2502 2503 Returns a definite failure if target is not isolated from above. 2504 Returns a silenceable failure if the pattern application failed. 2505 }]; 2506 2507 let arguments = (ins TransformHandleTypeInterface:$target); 2508 let results = (outs TransformHandleTypeInterface:$transformed); 2509 2510 let assemblyFormat = 2511 "$target attr-dict `:` functional-type($target, results)"; 2512 2513 let builders = [ 2514 OpBuilder<(ins "Value":$target)> 2515 ]; 2516 2517 let extraClassDeclaration = [{ 2518 ::mlir::DiagnosedSilenceableFailure applyToOne( 2519 ::mlir::transform::TransformRewriter &rewriter, 2520 ::mlir::linalg::LinalgOp target, 2521 ::mlir::transform::ApplyToEachResultList &results, 2522 ::mlir::transform::TransformState &state); 2523 }]; 2524} 2525 2526//===----------------------------------------------------------------------===// 2527// Transpose Conv2D 2528//===----------------------------------------------------------------------===// 2529 2530def TransposeConv2DOp : Op<Transform_Dialect, 2531 "structured.transpose_conv2d", 2532 [FunctionalStyleTransformOpTrait, 2533 MemoryEffectsOpInterface, 2534 TransformOpInterface, 2535 TransformEachOpTrait, 2536 ReportTrackingListenerFailuresOpTrait]> { 2537 let description = [{ 2538 Convert linalg.conv_2d_nhwc_fhwc into linalg.conv_2d_nhwc_hwcf by introducing 2539 a linalg.transpose on the filter tensor/memref. 2540 2541 Whilst the fhwc filter channel ordering can be desirable for certain targets 2542 and is a more direct mapping to higher level dialects such as TOSA (which only 2543 supports this ordering) hwcf is better suited for transformations such as 2544 img2col which can make use of optimized BLAS routines such as GEMM. 2545 2546 Returns one handle: 2547 - The final operation of the sequence that replaces the original 2548 convolution. 2549 2550 #### Return modes: 2551 2552 Returns a definite failure if target is not isolated from above. 2553 Returns a silenceable failure if the pattern application failed. 2554 }]; 2555 2556 let arguments = (ins TransformHandleTypeInterface:$target); 2557 let results = (outs TransformHandleTypeInterface:$transformed); 2558 2559 let assemblyFormat = 2560 "$target attr-dict `:` functional-type($target, results)"; 2561 2562 let builders = [ 2563 OpBuilder<(ins "Value":$target)> 2564 ]; 2565 2566 let extraClassDeclaration = [{ 2567 ::mlir::DiagnosedSilenceableFailure applyToOne( 2568 ::mlir::transform::TransformRewriter &rewriter, 2569 ::mlir::linalg::LinalgOp target, 2570 ::mlir::transform::ApplyToEachResultList &results, 2571 ::mlir::transform::TransformState &state); 2572 }]; 2573} 2574 2575//===----------------------------------------------------------------------===// 2576// TransposeMatmulOp 2577//===----------------------------------------------------------------------===// 2578 2579def TransposeMatmulOp : Op<Transform_Dialect, 2580 "structured.transpose_matmul", 2581 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 2582 TransformOpInterface, TransformEachOpTrait, 2583 ReportTrackingListenerFailuresOpTrait]> { 2584 let description = [{ 2585 Convert Linalg matmul ops to transposed variants. 2586 2587 By default the LHS matrix is transposed. Specify `<rhs>` to instead 2588 transpose RHS matrix. 2589 2590 #### Return modes: 2591 2592 This operation fails if `target` is unsupported, i.e., not a 2593 `linalg.matmul` or `linalg.batch_matmul`. Otherwise, the operation succeeds 2594 and returns a handle to the transposed matmul op. 2595 }]; 2596 2597 let arguments = (ins 2598 TransformHandleTypeInterface:$target, 2599 DefaultValuedAttr<TransposeMatmulInput, 2600 "TransposeMatmulInput::lhs">:$inputToTranspose); 2601 let results = (outs TransformHandleTypeInterface:$transformed); 2602 2603 let assemblyFormat = [{ 2604 $target (`<` $inputToTranspose^ `>`)? 2605 attr-dict `:` functional-type($target, results) 2606 }]; 2607 2608 let builders = [ 2609 OpBuilder<(ins "Value":$target)> 2610 ]; 2611 2612 let extraClassDeclaration = [{ 2613 ::mlir::DiagnosedSilenceableFailure applyToOne( 2614 ::mlir::transform::TransformRewriter &rewriter, 2615 ::mlir::linalg::LinalgOp target, 2616 ::mlir::transform::ApplyToEachResultList &results, 2617 ::mlir::transform::TransformState &state); 2618 }]; 2619} 2620 2621//===----------------------------------------------------------------------===// 2622// InsertSliceToCopyOp 2623//===----------------------------------------------------------------------===// 2624 2625def InsertSliceToCopyOp : 2626 Op<Transform_Dialect, "structured.insert_slice_to_copy", 2627 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 2628 TransformEachOpTrait, TransformOpInterface]> { 2629 let description = [{ 2630 Targeted rewrite of an tensor.insert_slice to linalg.copy. 2631 This is useful to materialize copies explicitly before bufferization and 2632 transform them, avoiding the need to rediscover them after bufferization. 2633 2634 If the insert_slice source is already a linalg.copy, only return the source 2635 op (i.e. do not create an additional linalg.copy op). 2636 2637 #### Return modes: 2638 2639 The operation always succeeds and returns a handle to the relevant 2640 linalg.copy op. 2641 }]; 2642 2643 let arguments = (ins TransformHandleTypeInterface:$target); 2644 let results = (outs TransformHandleTypeInterface:$transformed); 2645 2646 let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) "; 2647 2648 let builders = [ 2649 OpBuilder<(ins "Value":$target)>, 2650 ]; 2651 let extraClassDeclaration = [{ 2652 ::mlir::DiagnosedSilenceableFailure applyToOne( 2653 ::mlir::transform::TransformRewriter &rewriter, 2654 ::mlir::Operation *target, 2655 ::mlir::transform::ApplyToEachResultList &results, 2656 ::mlir::transform::TransformState &state); 2657 }]; 2658} 2659 2660//===----------------------------------------------------------------------===// 2661// MapCopyToThreadsOp 2662//===----------------------------------------------------------------------===// 2663 2664def MapCopyToThreadsOp : 2665 Op<Transform_Dialect, "structured.gpu.map_copy_to_threads", 2666 [FunctionalStyleTransformOpTrait, 2667 MemoryEffectsOpInterface, 2668 TransformEachOpTrait, 2669 TransformOpInterface]> { 2670 let description = [{ 2671 Targeted mapping of a linalg.copy / tensor.pad operation on tensors to a GPU 2672 thread mapping. 2673 2674 This operation implements a greedy heuristic that determines a good 2675 distribution of threads to break down the copy/pad operation into. 2676 The heuristic is driven by considerations related to the underlying 2677 architecture for which good high-level decisions are needed assuming certain 2678 hardware features. Relevant features are exposed via first-class attributes 2679 to control the behavior of the transformation at a high level. 2680 2681 For now, a single heuristic is implemented and can be extended on a per-need 2682 basis. 2683 2684 #### Return modes 2685 2686 This operation fails definitely if there is an unsupported op (i.e., not 2687 linalg.copy / tensor.pad) among the targeted op. Otherwise, the operation 2688 always succeeds and returns a handle to the relevant tiled linalg.copy / 2689 tensor.pad op and the enclosing scf.forall op. 2690 }]; 2691 2692 let arguments = (ins TransformHandleTypeInterface:$target, 2693 I64Attr:$total_num_threads, 2694 I64Attr:$desired_bit_alignment); 2695 let results = (outs TransformHandleTypeInterface:$forall_op, 2696 TransformHandleTypeInterface:$tiled_op); 2697 2698 let assemblyFormat = [{ 2699 $target 2700 `total_num_threads` `=` $total_num_threads 2701 `desired_bit_alignment` `=` $desired_bit_alignment 2702 attr-dict 2703 `:` functional-type(operands, results) 2704 }]; 2705 2706 let builders = [ 2707 OpBuilder<(ins "Value":$target)>, 2708 ]; 2709 let extraClassDeclaration = [{ 2710 ::mlir::DiagnosedSilenceableFailure applyToOne( 2711 ::mlir::transform::TransformRewriter &rewriter, 2712 ::mlir::Operation *target, 2713 ::mlir::transform::ApplyToEachResultList &results, 2714 ::mlir::transform::TransformState &state); 2715 2716 ::llvm::SmallVector<::mlir::OpFoldResult> getMixedNumThreads(); 2717 }]; 2718} 2719 2720//===----------------------------------------------------------------------===// 2721// Winograd Conv2D 2722//===----------------------------------------------------------------------===// 2723 2724def WinogradConv2DOp : Op<Transform_Dialect, 2725 "structured.winograd_conv2d", 2726 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 2727 TransformOpInterface, TransformEachOpTrait, 2728 ReportTrackingListenerFailuresOpTrait]> { 2729 let description = [{ 2730 Winograd Conv2D algorithm will convert linalg Conv2D operation into batched 2731 matrix multiply. Before the matrix multiply, it will convert filter and 2732 input into a format suitable for batched matrix multiply. After the matrix 2733 multiply, it will convert output to the final result tensor. 2734 2735 The algorithm F(m x m, r x r) is 2736 2737 Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A 2738 2739 The size of output Y is m x m. The size of filter g is r x r. The size of 2740 input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are 2741 transformation matrices. 2742 2743 #### Return modes: 2744 2745 This operation produces a silenceable failure if `target` is unsupported. 2746 Otherwise, the operation succeeds and returns a handle of the sequence that 2747 replaces the original convolution. 2748 }]; 2749 2750 let arguments = (ins TransformHandleTypeInterface:$target, 2751 I64Attr:$m, 2752 I64Attr:$r); 2753 let results = (outs TransformHandleTypeInterface:$transformed); 2754 2755 let assemblyFormat = 2756 "$target attr-dict `:` functional-type($target, results)"; 2757 2758 let builders = [ 2759 OpBuilder<(ins "Value":$target)> 2760 ]; 2761 2762 let extraClassDeclaration = [{ 2763 ::mlir::DiagnosedSilenceableFailure applyToOne( 2764 ::mlir::transform::TransformRewriter &rewriter, 2765 ::mlir::linalg::LinalgOp target, 2766 ::mlir::transform::ApplyToEachResultList &results, 2767 ::mlir::transform::TransformState &state); 2768 }]; 2769} 2770 2771def DecomposeWinogradOp : Op<Transform_Dialect, 2772 "structured.decompose_winograd_op", 2773 [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 2774 TransformOpInterface, TransformEachOpTrait, 2775 ReportTrackingListenerFailuresOpTrait]> { 2776 let description = [{ 2777 Decompose winograd operations. It will convert filter, input and output 2778 transform operations into a combination of scf, tensor, and linalg 2779 equivalent operations. Before applying this transform operations, users 2780 need to tile winograd transform operations into supported sizes. 2781 2782 #### Return modes: 2783 2784 This operation fails if `target` is unsupported. Otherwise, the operation 2785 succeeds and returns a handle of the sequence that replaces the original 2786 operations. 2787 }]; 2788 2789 let arguments = (ins TransformHandleTypeInterface:$target); 2790 let results = (outs TransformHandleTypeInterface:$transformed); 2791 2792 let assemblyFormat = 2793 "$target attr-dict `:` functional-type($target, results)"; 2794 2795 let builders = [ 2796 OpBuilder<(ins "Value":$target)> 2797 ]; 2798 2799 let extraClassDeclaration = [{ 2800 ::mlir::DiagnosedSilenceableFailure applyToOne( 2801 ::mlir::transform::TransformRewriter &rewriter, 2802 ::mlir::Operation *target, 2803 ::mlir::transform::ApplyToEachResultList &results, 2804 ::mlir::transform::TransformState &state); 2805 }]; 2806} 2807 2808#endif // LINALG_TRANSFORM_OPS 2809