xref: /llvm-project/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td (revision ced2fc7819d5ddea616ec330f18e08ff284c1868)
1//===-- Passes.td - Bufferization passes definition file ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES
10#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES
11
12include "mlir/Pass/PassBase.td"
13
14def BufferDeallocation : Pass<"buffer-deallocation", "func::FuncOp"> {
15  let summary = "Adds all required dealloc operations for all allocations in "
16                "the input program";
17  let description = [{
18    This pass implements an algorithm to automatically introduce all required
19    deallocation operations for all buffers in the input program. This ensures
20    that the resulting program does not have any memory leaks.
21
22
23    Input
24
25    ```mlir
26    #map0 = affine_map<(d0) -> (d0)>
27    module {
28      func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
29        cf.cond_br %arg0, ^bb1, ^bb2
30      ^bb1:
31        cf.br ^bb3(%arg1 : memref<2xf32>)
32      ^bb2:
33        %0 = memref.alloc() : memref<2xf32>
34        linalg.generic {
35          indexing_maps = [#map0, #map0],
36          iterator_types = ["parallel"]} %arg1, %0 {
37        ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
38          %tmp1 = exp %gen1_arg0 : f32
39          linalg.yield %tmp1 : f32
40        }: memref<2xf32>, memref<2xf32>
41        cf.br ^bb3(%0 : memref<2xf32>)
42      ^bb3(%1: memref<2xf32>):
43        "memref.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
44        return
45      }
46    }
47
48    ```
49
50    Output
51
52    ```mlir
53    #map0 = affine_map<(d0) -> (d0)>
54    module {
55      func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
56        cf.cond_br %arg0, ^bb1, ^bb2
57      ^bb1:  // pred: ^bb0
58        %0 = memref.alloc() : memref<2xf32>
59        memref.copy(%arg1, %0) : memref<2xf32>, memref<2xf32>
60        cf.br ^bb3(%0 : memref<2xf32>)
61      ^bb2:  // pred: ^bb0
62        %1 = memref.alloc() : memref<2xf32>
63        linalg.generic {
64          indexing_maps = [#map0, #map0],
65          iterator_types = ["parallel"]} %arg1, %1 {
66        ^bb0(%arg3: f32, %arg4: f32):
67          %4 = exp %arg3 : f32
68          linalg.yield %4 : f32
69        }: memref<2xf32>, memref<2xf32>
70        %2 = memref.alloc() : memref<2xf32>
71        memref.copy(%1, %2) : memref<2xf32>, memref<2xf32>
72        dealloc %1 : memref<2xf32>
73        cf.br ^bb3(%2 : memref<2xf32>)
74      ^bb3(%3: memref<2xf32>):  // 2 preds: ^bb1, ^bb2
75        memref.copy(%3, %arg2) : memref<2xf32>, memref<2xf32>
76        dealloc %3 : memref<2xf32>
77        return
78      }
79
80    }
81    ```
82
83  }];
84  let constructor = "mlir::bufferization::createBufferDeallocationPass()";
85}
86
87def OwnershipBasedBufferDeallocation : Pass<
88    "ownership-based-buffer-deallocation"> {
89  let summary = "Adds all required dealloc operations for all allocations in "
90                "the input program";
91  let description = [{
92    This pass implements an algorithm to automatically introduce all required
93    deallocation operations for all buffers in the input program. This ensures
94    that the resulting program does not have any memory leaks.
95
96    The Buffer Deallocation pass operates on the level of operations
97    implementing the FunctionOpInterface. Such operations can take MemRefs as
98    arguments, but also return them. To ensure compatibility among all functions
99    (including external ones), some rules have to be enforced. They are just
100    assumed to hold for all external functions. Functions for which the
101    definition is available ideally also already adhere to the ABI.
102    Otherwise, all MemRef write operations in the input IR must dominate all
103    MemRef read operations in the input IR. Then, the pass may modify the input
104    IR by inserting `bufferization.clone` operations such that the output IR
105    adheres to the function boundary ABI:
106    * When a MemRef is passed as a function argument, ownership is never
107      acquired. It is always the caller's responsibility to deallocate such
108      MemRefs.
109    * Returning a MemRef from a function always passes ownership to the caller,
110      i.e., it is also the caller's responsibility to deallocate MemRefs
111      returned from a called function.
112    * A function must not return a MemRef with the same allocated base buffer as
113      one of its arguments (in this case a copy has to be created). Note that in
114      this context two subviews of the same buffer that don't overlap are also
115      considered an alias.
116
117    It is recommended to bufferize all operations first such that no tensor
118    values remain in the IR once this pass is applied. That way all allocated
119    MemRefs will be properly deallocated without any additional manual work.
120    Otherwise, the pass that bufferizes the remaining tensors is responsible to
121    add the corresponding deallocation operations. Note that this pass does not
122    consider any values of tensor type and assumes that MemRef values defined by
123    `bufferization.to_memref` do not return ownership and do not have to be
124    deallocated. `bufferization.to_tensor` operations are handled similarly to
125    `bufferization.clone` operations with the exception that the result value is
126    not handled because it's a tensor (not a MemRef).
127
128    Input
129
130    ```mlir
131    #map0 = affine_map<(d0) -> (d0)>
132    module {
133      func.func @condBranch(%arg0: i1,
134                            %arg1: memref<2xf32>,
135                            %arg2: memref<2xf32>) {
136        cf.cond_br %arg0, ^bb1, ^bb2
137      ^bb1:
138        cf.br ^bb3(%arg1 : memref<2xf32>)
139      ^bb2:
140        %0 = memref.alloc() : memref<2xf32>
141        linalg.generic {
142          indexing_maps = [#map0, #map0],
143          iterator_types = ["parallel"]}
144        outs(%arg1, %0 : memref<2xf32>, memref<2xf32>) {
145        ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
146          %tmp1 = exp %gen1_arg0 : f32
147          linalg.yield %tmp1 : f32
148        }
149        cf.br ^bb3(%0 : memref<2xf32>)
150      ^bb3(%1: memref<2xf32>):
151        "memref.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
152        return
153      }
154    }
155    ```
156
157    Output
158
159    ```mlir
160    #map = affine_map<(d0) -> (d0)>
161    module {
162      func.func @condBranch(%arg0: i1,
163                            %arg1: memref<2xf32>,
164                            %arg2: memref<2xf32>) {
165        %false = arith.constant false
166        %true = arith.constant true
167        cf.cond_br %arg0, ^bb1, ^bb2
168      ^bb1:  // pred: ^bb0
169        cf.br ^bb3(%arg1, %false : memref<2xf32>, i1)
170      ^bb2:  // pred: ^bb0
171        %alloc = memref.alloc() : memref<2xf32>
172        linalg.generic {
173          indexing_maps = [#map, #map],
174          iterator_types = ["parallel"]}
175        outs(%arg1, %alloc : memref<2xf32>, memref<2xf32>)
176        ^bb0(%out: f32, %out_0: f32):
177          %2 = math.exp %out : f32
178          linalg.yield %2, %out_0 : f32, f32
179        }
180        cf.br ^bb3(%alloc, %true : memref<2xf32>, i1)
181      ^bb3(%0: memref<2xf32>, %1: i1):  // 2 preds: ^bb1, ^bb2
182        memref.copy %0, %arg2 : memref<2xf32> to memref<2xf32>
183        %base_buffer, %offset, %sizes, %strides =
184          memref.extract_strided_metadata %0 :
185          memref<2xf32> -> memref<f32>, index, index, index
186        bufferization.dealloc (%base_buffer : memref<f32>) if (%1)
187        return
188      }
189    }
190    ```
191
192    The `private-function-dynamic-ownership` pass option allows the pass to add
193    additional arguments to private functions to dynamically give ownership of
194    MemRefs to callees. This can enable earlier deallocations and allows the
195    pass to by-pass the function boundary ABI and thus potentially leading to
196    fewer MemRef clones being inserted. For example, the private function
197    ```mlir
198    func.func private @passthrough(%memref: memref<2xi32>) -> memref<2xi32> {
199      return %memref : memref<2xi32>
200    }
201    ```
202    would be converted to
203    ```mlir
204    func.func private @passthrough(%memref: memref<2xi32>,
205                                   %ownership: i1) -> (memref<2xi32>, i1) {
206      return %memref, %ownership : memref<2xi32>, i1
207    }
208    ```
209    and thus allows the returned MemRef to alias with the MemRef passed as
210    argument (which would otherwise be forbidden according to the function
211    boundary ABI).
212  }];
213  let options = [
214    Option<"privateFuncDynamicOwnership", "private-function-dynamic-ownership",
215           "bool", /*default=*/"false",
216           "Allows to add additional arguments to private functions to "
217           "dynamically pass ownership of memrefs to callees. This can enable "
218           "earlier deallocations.">,
219  ];
220  let constructor = "mlir::bufferization::createOwnershipBasedBufferDeallocationPass()";
221
222  let dependentDialects = [
223    "mlir::bufferization::BufferizationDialect", "mlir::arith::ArithDialect",
224    "mlir::memref::MemRefDialect", "mlir::scf::SCFDialect"
225  ];
226}
227
228def BufferDeallocationSimplification :
229    Pass<"buffer-deallocation-simplification"> {
230  let summary = "Optimizes `bufferization.dealloc` operation for more "
231                "efficient codegen";
232  let description = [{
233    This pass uses static alias analysis to reduce the number of alias checks
234    required at runtime. Such checks are sometimes necessary to make sure that
235    memrefs aren't deallocated before their last usage (use after free) or that
236    some memref isn't deallocated twice (double free).
237  }];
238
239  let constructor =
240    "mlir::bufferization::createBufferDeallocationSimplificationPass()";
241
242  let dependentDialects = [
243    "mlir::bufferization::BufferizationDialect", "mlir::arith::ArithDialect",
244    "mlir::memref::MemRefDialect"
245  ];
246}
247
248def OptimizeAllocationLiveness
249    : Pass<"optimize-allocation-liveness", "func::FuncOp"> {
250  let summary = "This pass optimizes the liveness of temp allocations in the "
251                "input function";
252  let description =
253       [{This pass will find all operations that have a memory allocation effect.
254       It will search for the corresponding deallocation and move it right after
255       the last user of the allocation.
256       This will optimize the liveness of the allocations.
257
258       The pass is expected to run after the deallocation pipeline.}];
259  let constructor =
260      "mlir::bufferization::createOptimizeAllocationLivenessPass()";
261  let dependentDialects = ["mlir::memref::MemRefDialect"];
262}
263
264def LowerDeallocations : Pass<"bufferization-lower-deallocations"> {
265  let summary = "Lowers `bufferization.dealloc` operations to `memref.dealloc`"
266                "operations";
267  let description = [{
268    This pass lowers `bufferization.dealloc` operations to the `memref` dialect.
269    It can be applied to a `builtin.module` or operations implementing the
270    `FunctionOpInterface`. For the latter, only simple `dealloc` operations can
271    be lowered because the library function necessary for the fully generic
272    lowering cannot be inserted. In this case, an error will be emitted.
273    Next to `memref.dealloc` operations, it may also emit operations from the
274    `arith`, `scf`, and `func` dialects to build conditional deallocations and
275    library functions to avoid code-size blow-up.
276  }];
277
278  let constructor =
279    "mlir::bufferization::createLowerDeallocationsPass()";
280
281  let dependentDialects = [
282    "arith::ArithDialect", "memref::MemRefDialect", "scf::SCFDialect",
283    "func::FuncDialect"
284  ];
285}
286
287def BufferHoisting : Pass<"buffer-hoisting", "func::FuncOp"> {
288  let summary = "Optimizes placement of allocation operations by moving them "
289                "into common dominators and out of nested regions";
290  let description = [{
291    This pass implements an approach to aggressively move allocations upwards
292    into common dominators and out of nested regions.
293  }];
294  let constructor = "mlir::bufferization::createBufferHoistingPass()";
295}
296
297def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "func::FuncOp"> {
298  let summary = "Optimizes placement of allocation operations by moving them "
299                "out of loop nests";
300  let description = [{
301    This pass implements an approach to aggressively move allocations upwards
302    out of loop nests. It does not move allocations into common dominators.
303  }];
304  let constructor = "mlir::bufferization::createBufferLoopHoistingPass()";
305}
306
307def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">  {
308  let summary = "Converts memref-typed function results to out-params";
309  let description = [{
310    Some calling conventions prefer to pass output memrefs as "out params". The
311    conversion to this calling convention must be done as an atomic
312    transformation of the entire program (hence this is a module pass).
313
314    For example, if a call is rewritten, the callee needs to be rewritten
315    otherwise the IR will end up invalid. Thus, this transformation
316    require an atomic change to the entire program (e.g. the whole module).
317
318    This pass is expected to run immediately after bufferization is finished.
319    At that point, tensor-typed results will have been converted to memref-typed
320    results, and can be consistently converted to out params.
321
322    All memref-typed results are appended to the function argument list.
323
324    The main issue with this pass (and the out-param calling convention) is that
325    buffers for results need to be allocated in the caller. This currently only
326    works for static shaped memrefs.
327
328    If the hoist-static-allocs option is on, the pass tries to eliminate the
329    allocation for the returned memref and avoid the memory-copy if possible.
330    This optimization applies on the returned memref which has static shape and
331    is allocated by memref.alloc in the function. It will use the memref given
332    in function argument to replace the allocated memref.
333  }];
334  let options = [
335    Option<"addResultAttribute", "add-result-attr", "bool",
336       /*default=*/"false",
337       "Add the attribute 'bufferize.result' to all output parameters.">,
338    Option<"hoistStaticAllocs", "hoist-static-allocs",
339       "bool", /*default=*/"false",
340       "Hoist static allocations to call sites.">,
341  ];
342  let constructor = "mlir::bufferization::createBufferResultsToOutParamsPass()";
343  let dependentDialects = ["memref::MemRefDialect"];
344}
345
346def DropEquivalentBufferResults : Pass<"drop-equivalent-buffer-results", "ModuleOp">  {
347  let summary = "Remove MemRef return values that are equivalent to a bbArg";
348  let description = [{
349    This pass removes MemRef return values from functions if they are equivalent
350    to a function bbArg. In that case, the return value is redundant and the
351    respective CallOp operand can be used at the call site.
352
353    Note: If a bbArg buffer is not returned directly but casted to beforehand,
354    the buffer is still considered equivalent.
355  }];
356  let constructor = "mlir::bufferization::createDropEquivalentBufferResultsPass()";
357  let dependentDialects = ["memref::MemRefDialect"];
358}
359
360def EmptyTensorToAllocTensor : Pass<"empty-tensor-to-alloc-tensor"> {
361  let summary = "Replace all empty ops by alloc_tensor ops.";
362  let description = [{
363    tensor.empty ops return a tensor of unspecified contents who's only purpose
364    is to carry the tensor shape. This pass converts such ops to
365    bufferization.alloc_tensor ops, which bufferize to buffer allocations.
366  }];
367  let constructor = "mlir::bufferization::createEmptyTensorToAllocTensorPass()";
368  let dependentDialects = ["tensor::TensorDialect"];
369}
370
371def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
372  let summary = "One-Shot Bufferize";
373  let description = [{
374    This pass bufferizes all ops that implement `BufferizableOpInterface`. It
375    first performs an inplacability analysis on SSA use-def chains of tensor
376    values to determine which OpOperands may bufferize in-place, i.e., without
377    inserting a buffer copy. It then rewrites the IR, inserting a buffer
378    allocation and copy for each OpOperand that was decided to bufferize
379    out-of-place.
380
381    One-Shot Bufferize (and `BufferizableOpInterface`) was designed for ops that
382    are in destination-passing style. When bufferizing such ops, it is possible
383    to reuse the buffer of a tensor OpOperand for a tensor OpResult. In essence,
384    a possible destination of an operation is already passed as an SSA value.
385
386    `tensor.insert` is an example for an op in destination-passing style. E.g.,
387    when bufferizing `%t0 = tensor.insert %f into %dest[%idx]`, `buffer(%t0)` is
388    identical to `buffer(%dest)` in the absence of RaW conflicts. As a counter
389    example, `tensor.generate` is not in destination-passing style and always
390    results in a new buffer allocation.
391
392    One-Shot Bufferize does not deallocate any buffers that it allocates. The
393    `-buffer-deallocation` pass should be run after One-Shot Bufferize to insert
394    the deallocation operations necessary to eliminate memory leaks.
395
396    One-Shot Bufferize will by default reject IR that contains non-bufferizable
397    op, i.e., ops that do not implemement BufferizableOpInterface. Such IR can
398    be allowed with `allow-unknown-ops=1`. In that case, to_memref and to_tensor
399    ops will be generated at the bufferization boundary. This is useful for
400    compatibility with existing partial bufferization passes: These can
401    bufferize the remaining IR after running One-Shot Bufferize.
402
403    Note: Running One-Shot Bufferize after a partial bufferization pass is
404    currently not supported. Running partial bufferization passes after running
405    One-Shot Bufferize is supported and the recommended way to gradually
406    migrate from partial bufferization to One-Shot Bufferize.
407
408    With `dialect-filter`, bufferization can be restricted to a set of dialects.
409    If no filter is specified, all ops that implement `BufferizableOpInterface`
410    are bufferized. Ops from the `std` dialect are an exception: These ops are
411    always ignored, even if no filter is specified. When specifying a dialect
412    filter and `allow-unknown-ops` is not turned on, bufferization would fail
413    when encountering an op that is not included in the filter (even if it is
414    bufferizable).
415
416    One-Shot Bufferize will by default assume memref types with fully dynamic
417    layout maps when a precise layout cannot be inferred. E.g., this is the case
418    when wrapping a non-bufferizable op in to_memref/to_tensor ops. This
419    behavior can be overridden with `unknown-type-conversion`. Valid values are
420    `fully-dynamic-layout-map` and `identity-layout-map`.
421
422    For testing/debugging purposes, `test-analysis-only=1 print-conflicts=1`
423    prints analysis results and explains why an OpOperand was decided to
424    bufferize out-of-place. This is useful for understanding why One-Shot
425    Bufferize chose to insert a certain buffer copy.
426
427    `bufferize-function-boundaries` is an experimental flag for bufferizing
428    `FuncOp`, `ReturnOp` and `CallOp`. This feature is still under development
429    and supports only simple cases at the moment. In particular:
430
431    * Recursive or circular function call graphs are not supported.
432    * External functions (without bodies) that return a tensor are not
433      supported.
434    * Function with multiple blocks or multiple ReturnOps are not supported.
435    * Layout maps on function signatures can be controlled with a separate
436      `function-boundary-type-conversion` option, which is similar to
437      `unknown-type-conversion` but supports an additional `infer-layout-map`
438      option. `fully-dynamic-layout-map` and `identity-layout-map` ensure that
439      function signatures bufferize to easily predictable types, potentially at
440      the cost of additional casts and copies, respectively. When layout maps
441      are inferred, function return types may be more precise, but less
442      predictable. Function argument types cannot be inferred and always have
443      fully dynamic layout maps with `infer-layout-map`.
444
445    One-Shot Bufferize implements the following contract around function calls:
446    The buffer of function arguments is always writable (unless annotated with
447    `bufferization.writable = false`). A buffer copy may be inserted at the call
448    site where necessary. Alias sets and equivalence info is propagated through
449    function calls. Whenever a function is bufferized, all other functions that
450    are being called were already analyzed and bufferized, so exact alias and
451    equivalence information is available. This is why recursive function calls
452    are not yet supported.
453
454    One-Shot Bufferize gathers additional information during the analysis phase
455    when function boundary bufferization is activated. E.g., whether a function
456    argument is read/written and which returned values are aliasing/equivalent.
457    For debugging purposes, such information can be printed with
458    `test-analysis-only`.
459
460    The order in which ops are analyzed is important. The analysis is greedy and
461    ops that are analyzed earlier are more likely to bufferize in-place. The
462    heuristic can be set with `analysis-heuristic`. At the moment, the following
463    heuristics are available:
464
465    * `bottom-up` (default): Analyze ops from bottom to top.
466    * `top-down`: Analyze ops from top to bottom.
467    * `fuzzer`: Randomize the ordering of ops with `analysis-fuzzer-seed`.
468    * `bottom-up-from-terminators`: Traverse the reverse use-def chains of
469      tensor IR, starting from region branch terminators (bottom-up). Nested
470      regions are traversed before enclosing regions. Analyze the traversed ops
471      first, then analyze the remaining ops bottom-up. This heuristic is useful
472      for bufferizing loop constructs. One-Shot Bufferize currently supports
473      only such IR where yielded tensor values bufferize to equivalent region
474      iter_args, and first analyzing all ops on the path from the "yielding" op
475      to the beginning of the loop body makes it more likely for the region
476      iter_args and yielded values to bufferize to equivalent buffers.
477  }];
478  let options = [
479    Option<"allowReturnAllocsFromLoops", "allow-return-allocs-from-loops",
480           "bool", /*default=*/"false",
481           "Allows returning/yielding new allocations from a loop.">,
482    Option<"allowUnknownOps", "allow-unknown-ops", "bool",
483           /*default=*/"false",
484           "Allows unknown (not bufferizable) ops in the input IR.">,
485    Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
486           /*default=*/"0",
487           "Test only: Analyze ops in random order with a given seed (fuzzer)">,
488    Option<"analysisHeuristic", "analysis-heuristic", "std::string",
489           /*default=*/"\"bottom-up\"",
490           "Heuristic that control the IR traversal during analysis">,
491    Option<"bufferizeFunctionBoundaries", "bufferize-function-boundaries",
492           "bool", /*default=*/"0",
493           "Bufferize function boundaries (experimental).">,
494    Option<"checkParallelRegions", "check-parallel-regions", "bool",
495           /*default=*/"true", "Account for parallel regions in RaW analysis.">,
496    Option<"copyBeforeWrite", "copy-before-write", "bool", /*default=*/"false",
497           "Skip the analysis. Make a buffer copy on every write.">,
498    ListOption<"dialectFilter", "dialect-filter", "std::string",
499               "Restrict bufferization to ops from these dialects.">,
500    Option<"dumpAliasSets", "dump-alias-sets", "bool", /*default=*/"false",
501           "Test only: Annotate tensor IR with alias sets">,
502    ListOption<"noAnalysisFuncFilter", "no-analysis-func-filter", "std::string",
503               "Skip analysis of functions with these symbol names."
504               "Set copyBeforeWrite to true when bufferizing them.">,
505    Option<"functionBoundaryTypeConversion",
506           "function-boundary-type-conversion", "std::string",
507           /*default=*/"\"infer-layout-map\"",
508           "Controls layout maps when bufferizing function signatures.">,
509    Option<"mustInferMemorySpace", "must-infer-memory-space", "bool",
510           /*default=*/"false",
511           "The memory space of an memref types must always be inferred. If "
512           "unset, a default memory space of 0 is used otherwise.">,
513    Option<"useEncodingForMemorySpace", "use-encoding-for-memory-space", "bool",
514            /*default=*/"false",
515            "Use the Tensor encoding attribute for the memory space. Exclusive to"
516            " the 'must-infer-memory-space' option">,
517    Option<"testAnalysisOnly", "test-analysis-only", "bool",
518            /*default=*/"false",
519           "Test only: Only run inplaceability analysis and annotate IR">,
520    Option<"printConflicts", "print-conflicts", "bool",
521            /*default=*/"false",
522           "Test only: Annotate IR with RaW conflicts. Requires "
523           "test-analysis-only.">,
524    Option<"unknownTypeConversion", "unknown-type-conversion", "std::string",
525           /*default=*/"\"fully-dynamic-layout-map\"",
526           "Controls layout maps for non-inferrable memref types.">,
527    Option<"bufferAlignment", "buffer-alignment", "uint64_t", /*default=*/"64",
528           "Sets the alignment of newly allocated buffers.">,
529  ];
530  let constructor = "mlir::bufferization::createOneShotBufferizePass()";
531
532  let statistics = [
533    Statistic<"numBufferAlloc", "num-buffer-alloc",
534              "Number of buffer allocations">,
535    Statistic<"numTensorInPlace", "num-tensor-in-place",
536              "Number of in-place tensor OpOperands">,
537    Statistic<"numTensorOutOfPlace", "num-tensor-out-of-place",
538              "Number of out-of-place tensor OpOperands">,
539  ];
540}
541
542def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "func::FuncOp"> {
543  let summary = "Promotes heap-based allocations to automatically managed "
544                "stack-based allocations";
545  let description = [{
546    This pass implements a simple algorithm to convert heap-based memory
547    allocations to stack-based ones. It uses a built-in heuristic to decide
548    whether it makes sense to convert an allocation. Furthermore, dynamic
549    shaped buffers that are limited by the rank of the tensor can be
550    converted. They are only transformed if they are considered to be small.
551  }];
552  let constructor = "mlir::bufferization::createPromoteBuffersToStackPass()";
553  let options = [
554    Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
555           /*default=*/"1024",
556           "Maximal size in bytes to promote allocations to stack.">,
557    Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned",
558           /*default=*/"1",
559           "Maximal memref rank to promote dynamic buffers.">,
560  ];
561}
562
563def EmptyTensorElimination : Pass<"eliminate-empty-tensors"> {
564  let summary = "Try to eliminate all tensor.empty ops.";
565  let description = [{
566    Try to eliminate "tensor.empty" ops inside `op`. This transformation looks
567    for subset ops that insert a tensor that originates from a "tensor.empty"
568    (as per the reverse use-def chain). Such "tensor.empty" ops are replaced
569    with the destination subset.
570
571    E.g.:
572    ```
573    %0 = tensor.empty() : tensor<10xf32>
574    %1 = linalg.fill ... outs(%0 : tensor<10xf32>)
575    %2 = tensor.insert_slice %1 into %t ...
576    ```
577
578    In the above example, the subset op is "tensor.insert_slice". When tracing
579    back the reverse use-def chain of a the source, we end up at a
580    "tensor.empty" op. The "tensor.empty" op is replaced with a
581    "tensor.extract_slice" op.
582  }];
583  let constructor = "mlir::bufferization::createEmptyTensorEliminationPass()";
584}
585
586#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES
587