1//===-- Passes.td - Bufferization passes definition file ---*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES 10#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES 11 12include "mlir/Pass/PassBase.td" 13 14def BufferDeallocation : Pass<"buffer-deallocation", "func::FuncOp"> { 15 let summary = "Adds all required dealloc operations for all allocations in " 16 "the input program"; 17 let description = [{ 18 This pass implements an algorithm to automatically introduce all required 19 deallocation operations for all buffers in the input program. This ensures 20 that the resulting program does not have any memory leaks. 21 22 23 Input 24 25 ```mlir 26 #map0 = affine_map<(d0) -> (d0)> 27 module { 28 func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 29 cf.cond_br %arg0, ^bb1, ^bb2 30 ^bb1: 31 cf.br ^bb3(%arg1 : memref<2xf32>) 32 ^bb2: 33 %0 = memref.alloc() : memref<2xf32> 34 linalg.generic { 35 indexing_maps = [#map0, #map0], 36 iterator_types = ["parallel"]} %arg1, %0 { 37 ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): 38 %tmp1 = exp %gen1_arg0 : f32 39 linalg.yield %tmp1 : f32 40 }: memref<2xf32>, memref<2xf32> 41 cf.br ^bb3(%0 : memref<2xf32>) 42 ^bb3(%1: memref<2xf32>): 43 "memref.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () 44 return 45 } 46 } 47 48 ``` 49 50 Output 51 52 ```mlir 53 #map0 = affine_map<(d0) -> (d0)> 54 module { 55 func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 56 cf.cond_br %arg0, ^bb1, ^bb2 57 ^bb1: // pred: ^bb0 58 %0 = memref.alloc() : memref<2xf32> 59 memref.copy(%arg1, %0) : memref<2xf32>, memref<2xf32> 60 cf.br ^bb3(%0 : memref<2xf32>) 61 ^bb2: // pred: ^bb0 62 %1 = memref.alloc() : memref<2xf32> 63 linalg.generic { 64 indexing_maps = [#map0, #map0], 65 iterator_types = ["parallel"]} %arg1, %1 { 66 ^bb0(%arg3: f32, %arg4: f32): 67 %4 = exp %arg3 : f32 68 linalg.yield %4 : f32 69 }: memref<2xf32>, memref<2xf32> 70 %2 = memref.alloc() : memref<2xf32> 71 memref.copy(%1, %2) : memref<2xf32>, memref<2xf32> 72 dealloc %1 : memref<2xf32> 73 cf.br ^bb3(%2 : memref<2xf32>) 74 ^bb3(%3: memref<2xf32>): // 2 preds: ^bb1, ^bb2 75 memref.copy(%3, %arg2) : memref<2xf32>, memref<2xf32> 76 dealloc %3 : memref<2xf32> 77 return 78 } 79 80 } 81 ``` 82 83 }]; 84 let constructor = "mlir::bufferization::createBufferDeallocationPass()"; 85} 86 87def OwnershipBasedBufferDeallocation : Pass< 88 "ownership-based-buffer-deallocation"> { 89 let summary = "Adds all required dealloc operations for all allocations in " 90 "the input program"; 91 let description = [{ 92 This pass implements an algorithm to automatically introduce all required 93 deallocation operations for all buffers in the input program. This ensures 94 that the resulting program does not have any memory leaks. 95 96 The Buffer Deallocation pass operates on the level of operations 97 implementing the FunctionOpInterface. Such operations can take MemRefs as 98 arguments, but also return them. To ensure compatibility among all functions 99 (including external ones), some rules have to be enforced. They are just 100 assumed to hold for all external functions. Functions for which the 101 definition is available ideally also already adhere to the ABI. 102 Otherwise, all MemRef write operations in the input IR must dominate all 103 MemRef read operations in the input IR. Then, the pass may modify the input 104 IR by inserting `bufferization.clone` operations such that the output IR 105 adheres to the function boundary ABI: 106 * When a MemRef is passed as a function argument, ownership is never 107 acquired. It is always the caller's responsibility to deallocate such 108 MemRefs. 109 * Returning a MemRef from a function always passes ownership to the caller, 110 i.e., it is also the caller's responsibility to deallocate MemRefs 111 returned from a called function. 112 * A function must not return a MemRef with the same allocated base buffer as 113 one of its arguments (in this case a copy has to be created). Note that in 114 this context two subviews of the same buffer that don't overlap are also 115 considered an alias. 116 117 It is recommended to bufferize all operations first such that no tensor 118 values remain in the IR once this pass is applied. That way all allocated 119 MemRefs will be properly deallocated without any additional manual work. 120 Otherwise, the pass that bufferizes the remaining tensors is responsible to 121 add the corresponding deallocation operations. Note that this pass does not 122 consider any values of tensor type and assumes that MemRef values defined by 123 `bufferization.to_memref` do not return ownership and do not have to be 124 deallocated. `bufferization.to_tensor` operations are handled similarly to 125 `bufferization.clone` operations with the exception that the result value is 126 not handled because it's a tensor (not a MemRef). 127 128 Input 129 130 ```mlir 131 #map0 = affine_map<(d0) -> (d0)> 132 module { 133 func.func @condBranch(%arg0: i1, 134 %arg1: memref<2xf32>, 135 %arg2: memref<2xf32>) { 136 cf.cond_br %arg0, ^bb1, ^bb2 137 ^bb1: 138 cf.br ^bb3(%arg1 : memref<2xf32>) 139 ^bb2: 140 %0 = memref.alloc() : memref<2xf32> 141 linalg.generic { 142 indexing_maps = [#map0, #map0], 143 iterator_types = ["parallel"]} 144 outs(%arg1, %0 : memref<2xf32>, memref<2xf32>) { 145 ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): 146 %tmp1 = exp %gen1_arg0 : f32 147 linalg.yield %tmp1 : f32 148 } 149 cf.br ^bb3(%0 : memref<2xf32>) 150 ^bb3(%1: memref<2xf32>): 151 "memref.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () 152 return 153 } 154 } 155 ``` 156 157 Output 158 159 ```mlir 160 #map = affine_map<(d0) -> (d0)> 161 module { 162 func.func @condBranch(%arg0: i1, 163 %arg1: memref<2xf32>, 164 %arg2: memref<2xf32>) { 165 %false = arith.constant false 166 %true = arith.constant true 167 cf.cond_br %arg0, ^bb1, ^bb2 168 ^bb1: // pred: ^bb0 169 cf.br ^bb3(%arg1, %false : memref<2xf32>, i1) 170 ^bb2: // pred: ^bb0 171 %alloc = memref.alloc() : memref<2xf32> 172 linalg.generic { 173 indexing_maps = [#map, #map], 174 iterator_types = ["parallel"]} 175 outs(%arg1, %alloc : memref<2xf32>, memref<2xf32>) 176 ^bb0(%out: f32, %out_0: f32): 177 %2 = math.exp %out : f32 178 linalg.yield %2, %out_0 : f32, f32 179 } 180 cf.br ^bb3(%alloc, %true : memref<2xf32>, i1) 181 ^bb3(%0: memref<2xf32>, %1: i1): // 2 preds: ^bb1, ^bb2 182 memref.copy %0, %arg2 : memref<2xf32> to memref<2xf32> 183 %base_buffer, %offset, %sizes, %strides = 184 memref.extract_strided_metadata %0 : 185 memref<2xf32> -> memref<f32>, index, index, index 186 bufferization.dealloc (%base_buffer : memref<f32>) if (%1) 187 return 188 } 189 } 190 ``` 191 192 The `private-function-dynamic-ownership` pass option allows the pass to add 193 additional arguments to private functions to dynamically give ownership of 194 MemRefs to callees. This can enable earlier deallocations and allows the 195 pass to by-pass the function boundary ABI and thus potentially leading to 196 fewer MemRef clones being inserted. For example, the private function 197 ```mlir 198 func.func private @passthrough(%memref: memref<2xi32>) -> memref<2xi32> { 199 return %memref : memref<2xi32> 200 } 201 ``` 202 would be converted to 203 ```mlir 204 func.func private @passthrough(%memref: memref<2xi32>, 205 %ownership: i1) -> (memref<2xi32>, i1) { 206 return %memref, %ownership : memref<2xi32>, i1 207 } 208 ``` 209 and thus allows the returned MemRef to alias with the MemRef passed as 210 argument (which would otherwise be forbidden according to the function 211 boundary ABI). 212 }]; 213 let options = [ 214 Option<"privateFuncDynamicOwnership", "private-function-dynamic-ownership", 215 "bool", /*default=*/"false", 216 "Allows to add additional arguments to private functions to " 217 "dynamically pass ownership of memrefs to callees. This can enable " 218 "earlier deallocations.">, 219 ]; 220 let constructor = "mlir::bufferization::createOwnershipBasedBufferDeallocationPass()"; 221 222 let dependentDialects = [ 223 "mlir::bufferization::BufferizationDialect", "mlir::arith::ArithDialect", 224 "mlir::memref::MemRefDialect", "mlir::scf::SCFDialect" 225 ]; 226} 227 228def BufferDeallocationSimplification : 229 Pass<"buffer-deallocation-simplification"> { 230 let summary = "Optimizes `bufferization.dealloc` operation for more " 231 "efficient codegen"; 232 let description = [{ 233 This pass uses static alias analysis to reduce the number of alias checks 234 required at runtime. Such checks are sometimes necessary to make sure that 235 memrefs aren't deallocated before their last usage (use after free) or that 236 some memref isn't deallocated twice (double free). 237 }]; 238 239 let constructor = 240 "mlir::bufferization::createBufferDeallocationSimplificationPass()"; 241 242 let dependentDialects = [ 243 "mlir::bufferization::BufferizationDialect", "mlir::arith::ArithDialect", 244 "mlir::memref::MemRefDialect" 245 ]; 246} 247 248def OptimizeAllocationLiveness 249 : Pass<"optimize-allocation-liveness", "func::FuncOp"> { 250 let summary = "This pass optimizes the liveness of temp allocations in the " 251 "input function"; 252 let description = 253 [{This pass will find all operations that have a memory allocation effect. 254 It will search for the corresponding deallocation and move it right after 255 the last user of the allocation. 256 This will optimize the liveness of the allocations. 257 258 The pass is expected to run after the deallocation pipeline.}]; 259 let constructor = 260 "mlir::bufferization::createOptimizeAllocationLivenessPass()"; 261 let dependentDialects = ["mlir::memref::MemRefDialect"]; 262} 263 264def LowerDeallocations : Pass<"bufferization-lower-deallocations"> { 265 let summary = "Lowers `bufferization.dealloc` operations to `memref.dealloc`" 266 "operations"; 267 let description = [{ 268 This pass lowers `bufferization.dealloc` operations to the `memref` dialect. 269 It can be applied to a `builtin.module` or operations implementing the 270 `FunctionOpInterface`. For the latter, only simple `dealloc` operations can 271 be lowered because the library function necessary for the fully generic 272 lowering cannot be inserted. In this case, an error will be emitted. 273 Next to `memref.dealloc` operations, it may also emit operations from the 274 `arith`, `scf`, and `func` dialects to build conditional deallocations and 275 library functions to avoid code-size blow-up. 276 }]; 277 278 let constructor = 279 "mlir::bufferization::createLowerDeallocationsPass()"; 280 281 let dependentDialects = [ 282 "arith::ArithDialect", "memref::MemRefDialect", "scf::SCFDialect", 283 "func::FuncDialect" 284 ]; 285} 286 287def BufferHoisting : Pass<"buffer-hoisting", "func::FuncOp"> { 288 let summary = "Optimizes placement of allocation operations by moving them " 289 "into common dominators and out of nested regions"; 290 let description = [{ 291 This pass implements an approach to aggressively move allocations upwards 292 into common dominators and out of nested regions. 293 }]; 294 let constructor = "mlir::bufferization::createBufferHoistingPass()"; 295} 296 297def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "func::FuncOp"> { 298 let summary = "Optimizes placement of allocation operations by moving them " 299 "out of loop nests"; 300 let description = [{ 301 This pass implements an approach to aggressively move allocations upwards 302 out of loop nests. It does not move allocations into common dominators. 303 }]; 304 let constructor = "mlir::bufferization::createBufferLoopHoistingPass()"; 305} 306 307def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp"> { 308 let summary = "Converts memref-typed function results to out-params"; 309 let description = [{ 310 Some calling conventions prefer to pass output memrefs as "out params". The 311 conversion to this calling convention must be done as an atomic 312 transformation of the entire program (hence this is a module pass). 313 314 For example, if a call is rewritten, the callee needs to be rewritten 315 otherwise the IR will end up invalid. Thus, this transformation 316 require an atomic change to the entire program (e.g. the whole module). 317 318 This pass is expected to run immediately after bufferization is finished. 319 At that point, tensor-typed results will have been converted to memref-typed 320 results, and can be consistently converted to out params. 321 322 All memref-typed results are appended to the function argument list. 323 324 The main issue with this pass (and the out-param calling convention) is that 325 buffers for results need to be allocated in the caller. This currently only 326 works for static shaped memrefs. 327 328 If the hoist-static-allocs option is on, the pass tries to eliminate the 329 allocation for the returned memref and avoid the memory-copy if possible. 330 This optimization applies on the returned memref which has static shape and 331 is allocated by memref.alloc in the function. It will use the memref given 332 in function argument to replace the allocated memref. 333 }]; 334 let options = [ 335 Option<"addResultAttribute", "add-result-attr", "bool", 336 /*default=*/"false", 337 "Add the attribute 'bufferize.result' to all output parameters.">, 338 Option<"hoistStaticAllocs", "hoist-static-allocs", 339 "bool", /*default=*/"false", 340 "Hoist static allocations to call sites.">, 341 ]; 342 let constructor = "mlir::bufferization::createBufferResultsToOutParamsPass()"; 343 let dependentDialects = ["memref::MemRefDialect"]; 344} 345 346def DropEquivalentBufferResults : Pass<"drop-equivalent-buffer-results", "ModuleOp"> { 347 let summary = "Remove MemRef return values that are equivalent to a bbArg"; 348 let description = [{ 349 This pass removes MemRef return values from functions if they are equivalent 350 to a function bbArg. In that case, the return value is redundant and the 351 respective CallOp operand can be used at the call site. 352 353 Note: If a bbArg buffer is not returned directly but casted to beforehand, 354 the buffer is still considered equivalent. 355 }]; 356 let constructor = "mlir::bufferization::createDropEquivalentBufferResultsPass()"; 357 let dependentDialects = ["memref::MemRefDialect"]; 358} 359 360def EmptyTensorToAllocTensor : Pass<"empty-tensor-to-alloc-tensor"> { 361 let summary = "Replace all empty ops by alloc_tensor ops."; 362 let description = [{ 363 tensor.empty ops return a tensor of unspecified contents who's only purpose 364 is to carry the tensor shape. This pass converts such ops to 365 bufferization.alloc_tensor ops, which bufferize to buffer allocations. 366 }]; 367 let constructor = "mlir::bufferization::createEmptyTensorToAllocTensorPass()"; 368 let dependentDialects = ["tensor::TensorDialect"]; 369} 370 371def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> { 372 let summary = "One-Shot Bufferize"; 373 let description = [{ 374 This pass bufferizes all ops that implement `BufferizableOpInterface`. It 375 first performs an inplacability analysis on SSA use-def chains of tensor 376 values to determine which OpOperands may bufferize in-place, i.e., without 377 inserting a buffer copy. It then rewrites the IR, inserting a buffer 378 allocation and copy for each OpOperand that was decided to bufferize 379 out-of-place. 380 381 One-Shot Bufferize (and `BufferizableOpInterface`) was designed for ops that 382 are in destination-passing style. When bufferizing such ops, it is possible 383 to reuse the buffer of a tensor OpOperand for a tensor OpResult. In essence, 384 a possible destination of an operation is already passed as an SSA value. 385 386 `tensor.insert` is an example for an op in destination-passing style. E.g., 387 when bufferizing `%t0 = tensor.insert %f into %dest[%idx]`, `buffer(%t0)` is 388 identical to `buffer(%dest)` in the absence of RaW conflicts. As a counter 389 example, `tensor.generate` is not in destination-passing style and always 390 results in a new buffer allocation. 391 392 One-Shot Bufferize does not deallocate any buffers that it allocates. The 393 `-buffer-deallocation` pass should be run after One-Shot Bufferize to insert 394 the deallocation operations necessary to eliminate memory leaks. 395 396 One-Shot Bufferize will by default reject IR that contains non-bufferizable 397 op, i.e., ops that do not implemement BufferizableOpInterface. Such IR can 398 be allowed with `allow-unknown-ops=1`. In that case, to_memref and to_tensor 399 ops will be generated at the bufferization boundary. This is useful for 400 compatibility with existing partial bufferization passes: These can 401 bufferize the remaining IR after running One-Shot Bufferize. 402 403 Note: Running One-Shot Bufferize after a partial bufferization pass is 404 currently not supported. Running partial bufferization passes after running 405 One-Shot Bufferize is supported and the recommended way to gradually 406 migrate from partial bufferization to One-Shot Bufferize. 407 408 With `dialect-filter`, bufferization can be restricted to a set of dialects. 409 If no filter is specified, all ops that implement `BufferizableOpInterface` 410 are bufferized. Ops from the `std` dialect are an exception: These ops are 411 always ignored, even if no filter is specified. When specifying a dialect 412 filter and `allow-unknown-ops` is not turned on, bufferization would fail 413 when encountering an op that is not included in the filter (even if it is 414 bufferizable). 415 416 One-Shot Bufferize will by default assume memref types with fully dynamic 417 layout maps when a precise layout cannot be inferred. E.g., this is the case 418 when wrapping a non-bufferizable op in to_memref/to_tensor ops. This 419 behavior can be overridden with `unknown-type-conversion`. Valid values are 420 `fully-dynamic-layout-map` and `identity-layout-map`. 421 422 For testing/debugging purposes, `test-analysis-only=1 print-conflicts=1` 423 prints analysis results and explains why an OpOperand was decided to 424 bufferize out-of-place. This is useful for understanding why One-Shot 425 Bufferize chose to insert a certain buffer copy. 426 427 `bufferize-function-boundaries` is an experimental flag for bufferizing 428 `FuncOp`, `ReturnOp` and `CallOp`. This feature is still under development 429 and supports only simple cases at the moment. In particular: 430 431 * Recursive or circular function call graphs are not supported. 432 * External functions (without bodies) that return a tensor are not 433 supported. 434 * Function with multiple blocks or multiple ReturnOps are not supported. 435 * Layout maps on function signatures can be controlled with a separate 436 `function-boundary-type-conversion` option, which is similar to 437 `unknown-type-conversion` but supports an additional `infer-layout-map` 438 option. `fully-dynamic-layout-map` and `identity-layout-map` ensure that 439 function signatures bufferize to easily predictable types, potentially at 440 the cost of additional casts and copies, respectively. When layout maps 441 are inferred, function return types may be more precise, but less 442 predictable. Function argument types cannot be inferred and always have 443 fully dynamic layout maps with `infer-layout-map`. 444 445 One-Shot Bufferize implements the following contract around function calls: 446 The buffer of function arguments is always writable (unless annotated with 447 `bufferization.writable = false`). A buffer copy may be inserted at the call 448 site where necessary. Alias sets and equivalence info is propagated through 449 function calls. Whenever a function is bufferized, all other functions that 450 are being called were already analyzed and bufferized, so exact alias and 451 equivalence information is available. This is why recursive function calls 452 are not yet supported. 453 454 One-Shot Bufferize gathers additional information during the analysis phase 455 when function boundary bufferization is activated. E.g., whether a function 456 argument is read/written and which returned values are aliasing/equivalent. 457 For debugging purposes, such information can be printed with 458 `test-analysis-only`. 459 460 The order in which ops are analyzed is important. The analysis is greedy and 461 ops that are analyzed earlier are more likely to bufferize in-place. The 462 heuristic can be set with `analysis-heuristic`. At the moment, the following 463 heuristics are available: 464 465 * `bottom-up` (default): Analyze ops from bottom to top. 466 * `top-down`: Analyze ops from top to bottom. 467 * `fuzzer`: Randomize the ordering of ops with `analysis-fuzzer-seed`. 468 * `bottom-up-from-terminators`: Traverse the reverse use-def chains of 469 tensor IR, starting from region branch terminators (bottom-up). Nested 470 regions are traversed before enclosing regions. Analyze the traversed ops 471 first, then analyze the remaining ops bottom-up. This heuristic is useful 472 for bufferizing loop constructs. One-Shot Bufferize currently supports 473 only such IR where yielded tensor values bufferize to equivalent region 474 iter_args, and first analyzing all ops on the path from the "yielding" op 475 to the beginning of the loop body makes it more likely for the region 476 iter_args and yielded values to bufferize to equivalent buffers. 477 }]; 478 let options = [ 479 Option<"allowReturnAllocsFromLoops", "allow-return-allocs-from-loops", 480 "bool", /*default=*/"false", 481 "Allows returning/yielding new allocations from a loop.">, 482 Option<"allowUnknownOps", "allow-unknown-ops", "bool", 483 /*default=*/"false", 484 "Allows unknown (not bufferizable) ops in the input IR.">, 485 Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned", 486 /*default=*/"0", 487 "Test only: Analyze ops in random order with a given seed (fuzzer)">, 488 Option<"analysisHeuristic", "analysis-heuristic", "std::string", 489 /*default=*/"\"bottom-up\"", 490 "Heuristic that control the IR traversal during analysis">, 491 Option<"bufferizeFunctionBoundaries", "bufferize-function-boundaries", 492 "bool", /*default=*/"0", 493 "Bufferize function boundaries (experimental).">, 494 Option<"checkParallelRegions", "check-parallel-regions", "bool", 495 /*default=*/"true", "Account for parallel regions in RaW analysis.">, 496 Option<"copyBeforeWrite", "copy-before-write", "bool", /*default=*/"false", 497 "Skip the analysis. Make a buffer copy on every write.">, 498 ListOption<"dialectFilter", "dialect-filter", "std::string", 499 "Restrict bufferization to ops from these dialects.">, 500 Option<"dumpAliasSets", "dump-alias-sets", "bool", /*default=*/"false", 501 "Test only: Annotate tensor IR with alias sets">, 502 ListOption<"noAnalysisFuncFilter", "no-analysis-func-filter", "std::string", 503 "Skip analysis of functions with these symbol names." 504 "Set copyBeforeWrite to true when bufferizing them.">, 505 Option<"functionBoundaryTypeConversion", 506 "function-boundary-type-conversion", "std::string", 507 /*default=*/"\"infer-layout-map\"", 508 "Controls layout maps when bufferizing function signatures.">, 509 Option<"mustInferMemorySpace", "must-infer-memory-space", "bool", 510 /*default=*/"false", 511 "The memory space of an memref types must always be inferred. If " 512 "unset, a default memory space of 0 is used otherwise.">, 513 Option<"useEncodingForMemorySpace", "use-encoding-for-memory-space", "bool", 514 /*default=*/"false", 515 "Use the Tensor encoding attribute for the memory space. Exclusive to" 516 " the 'must-infer-memory-space' option">, 517 Option<"testAnalysisOnly", "test-analysis-only", "bool", 518 /*default=*/"false", 519 "Test only: Only run inplaceability analysis and annotate IR">, 520 Option<"printConflicts", "print-conflicts", "bool", 521 /*default=*/"false", 522 "Test only: Annotate IR with RaW conflicts. Requires " 523 "test-analysis-only.">, 524 Option<"unknownTypeConversion", "unknown-type-conversion", "std::string", 525 /*default=*/"\"fully-dynamic-layout-map\"", 526 "Controls layout maps for non-inferrable memref types.">, 527 Option<"bufferAlignment", "buffer-alignment", "uint64_t", /*default=*/"64", 528 "Sets the alignment of newly allocated buffers.">, 529 ]; 530 let constructor = "mlir::bufferization::createOneShotBufferizePass()"; 531 532 let statistics = [ 533 Statistic<"numBufferAlloc", "num-buffer-alloc", 534 "Number of buffer allocations">, 535 Statistic<"numTensorInPlace", "num-tensor-in-place", 536 "Number of in-place tensor OpOperands">, 537 Statistic<"numTensorOutOfPlace", "num-tensor-out-of-place", 538 "Number of out-of-place tensor OpOperands">, 539 ]; 540} 541 542def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "func::FuncOp"> { 543 let summary = "Promotes heap-based allocations to automatically managed " 544 "stack-based allocations"; 545 let description = [{ 546 This pass implements a simple algorithm to convert heap-based memory 547 allocations to stack-based ones. It uses a built-in heuristic to decide 548 whether it makes sense to convert an allocation. Furthermore, dynamic 549 shaped buffers that are limited by the rank of the tensor can be 550 converted. They are only transformed if they are considered to be small. 551 }]; 552 let constructor = "mlir::bufferization::createPromoteBuffersToStackPass()"; 553 let options = [ 554 Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned", 555 /*default=*/"1024", 556 "Maximal size in bytes to promote allocations to stack.">, 557 Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned", 558 /*default=*/"1", 559 "Maximal memref rank to promote dynamic buffers.">, 560 ]; 561} 562 563def EmptyTensorElimination : Pass<"eliminate-empty-tensors"> { 564 let summary = "Try to eliminate all tensor.empty ops."; 565 let description = [{ 566 Try to eliminate "tensor.empty" ops inside `op`. This transformation looks 567 for subset ops that insert a tensor that originates from a "tensor.empty" 568 (as per the reverse use-def chain). Such "tensor.empty" ops are replaced 569 with the destination subset. 570 571 E.g.: 572 ``` 573 %0 = tensor.empty() : tensor<10xf32> 574 %1 = linalg.fill ... outs(%0 : tensor<10xf32>) 575 %2 = tensor.insert_slice %1 into %t ... 576 ``` 577 578 In the above example, the subset op is "tensor.insert_slice". When tracing 579 back the reverse use-def chain of a the source, we end up at a 580 "tensor.empty" op. The "tensor.empty" op is replaced with a 581 "tensor.extract_slice" op. 582 }]; 583 let constructor = "mlir::bufferization::createEmptyTensorEliminationPass()"; 584} 585 586#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES 587