xref: /llvm-project/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp (revision d1cad2290c10712ea27509081f50769ed597ee0f)
1 //===- OwnershipBasedBufferDeallocation.cpp - impl. for buffer dealloc. ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for computing correct `bufferization.dealloc`
10 // positions. Furthermore, buffer deallocation also adds required new clone
11 // operations to ensure that memrefs returned by functions never alias an
12 // argument.
13 //
14 // TODO:
15 // The current implementation does not support explicit-control-flow loops and
16 // the resulting code will be invalid with respect to program semantics.
17 // However, structured control-flow loops are fully supported.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
22 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
23 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
24 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
25 #include "mlir/Dialect/Func/IR/FuncOps.h"
26 #include "mlir/Dialect/MemRef/IR/MemRef.h"
27 #include "mlir/Dialect/SCF/IR/SCF.h"
28 #include "mlir/IR/Iterators.h"
29 #include "mlir/Interfaces/ControlFlowInterfaces.h"
30 
31 namespace mlir {
32 namespace bufferization {
33 #define GEN_PASS_DEF_OWNERSHIPBASEDBUFFERDEALLOCATION
34 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
35 } // namespace bufferization
36 } // namespace mlir
37 
38 using namespace mlir;
39 using namespace mlir::bufferization;
40 
41 //===----------------------------------------------------------------------===//
42 // Helpers
43 //===----------------------------------------------------------------------===//
44 
45 static Value buildBoolValue(OpBuilder &builder, Location loc, bool value) {
46   return builder.create<arith::ConstantOp>(loc, builder.getBoolAttr(value));
47 }
48 
49 static bool isMemref(Value v) { return isa<BaseMemRefType>(v.getType()); }
50 
51 /// Return "true" if the given op is guaranteed to have neither "Allocate" nor
52 /// "Free" side effects.
53 static bool hasNeitherAllocateNorFreeSideEffect(Operation *op) {
54   if (isa<MemoryEffectOpInterface>(op))
55     return !hasEffect<MemoryEffects::Allocate>(op) &&
56            !hasEffect<MemoryEffects::Free>(op);
57   // If the op does not implement the MemoryEffectOpInterface but has has
58   // recursive memory effects, then this op in isolation (without its body) does
59   // not have any side effects. All the ops inside the regions of this op will
60   // be processed separately.
61   return op->hasTrait<OpTrait::HasRecursiveMemoryEffects>();
62 }
63 
64 /// Return "true" if the given op has buffer semantics. I.e., it has buffer
65 /// operands, buffer results and/or buffer region entry block arguments.
66 static bool hasBufferSemantics(Operation *op) {
67   if (llvm::any_of(op->getOperands(), isMemref) ||
68       llvm::any_of(op->getResults(), isMemref))
69     return true;
70   for (Region &region : op->getRegions())
71     if (!region.empty())
72       if (llvm::any_of(region.front().getArguments(), isMemref))
73         return true;
74   return false;
75 }
76 
77 //===----------------------------------------------------------------------===//
78 // Backedges analysis
79 //===----------------------------------------------------------------------===//
80 
81 namespace {
82 
83 /// A straight-forward program analysis which detects loop backedges induced by
84 /// explicit control flow.
85 class Backedges {
86 public:
87   using BlockSetT = SmallPtrSet<Block *, 16>;
88   using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
89 
90 public:
91   /// Constructs a new backedges analysis using the op provided.
92   Backedges(Operation *op) { recurse(op); }
93 
94   /// Returns the number of backedges formed by explicit control flow.
95   size_t size() const { return edgeSet.size(); }
96 
97   /// Returns the start iterator to loop over all backedges.
98   BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
99 
100   /// Returns the end iterator to loop over all backedges.
101   BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
102 
103 private:
104   /// Enters the current block and inserts a backedge into the `edgeSet` if we
105   /// have already visited the current block. The inserted edge links the given
106   /// `predecessor` with the `current` block.
107   bool enter(Block &current, Block *predecessor) {
108     bool inserted = visited.insert(&current).second;
109     if (!inserted)
110       edgeSet.insert(std::make_pair(predecessor, &current));
111     return inserted;
112   }
113 
114   /// Leaves the current block.
115   void exit(Block &current) { visited.erase(&current); }
116 
117   /// Recurses into the given operation while taking all attached regions into
118   /// account.
119   void recurse(Operation *op) {
120     Block *current = op->getBlock();
121     // If the current op implements the `BranchOpInterface`, there can be
122     // cycles in the scope of all successor blocks.
123     if (isa<BranchOpInterface>(op)) {
124       for (Block *succ : current->getSuccessors())
125         recurse(*succ, current);
126     }
127     // Recurse into all distinct regions and check for explicit control-flow
128     // loops.
129     for (Region &region : op->getRegions()) {
130       if (!region.empty())
131         recurse(region.front(), current);
132     }
133   }
134 
135   /// Recurses into explicit control-flow structures that are given by
136   /// the successor relation defined on the block level.
137   void recurse(Block &block, Block *predecessor) {
138     // Try to enter the current block. If this is not possible, we are
139     // currently processing this block and can safely return here.
140     if (!enter(block, predecessor))
141       return;
142 
143     // Recurse into all operations and successor blocks.
144     for (Operation &op : block.getOperations())
145       recurse(&op);
146 
147     // Leave the current block.
148     exit(block);
149   }
150 
151   /// Stores all blocks that are currently visited and on the processing stack.
152   BlockSetT visited;
153 
154   /// Stores all backedges in the format (source, target).
155   BackedgeSetT edgeSet;
156 };
157 
158 } // namespace
159 
160 //===----------------------------------------------------------------------===//
161 // BufferDeallocation
162 //===----------------------------------------------------------------------===//
163 
164 namespace {
165 /// The buffer deallocation transformation which ensures that all allocs in the
166 /// program have a corresponding de-allocation.
167 class BufferDeallocation {
168 public:
169   BufferDeallocation(Operation *op, DeallocationOptions options)
170       : state(op), options(options) {}
171 
172   /// Performs the actual placement/creation of all dealloc operations.
173   LogicalResult deallocate(FunctionOpInterface op);
174 
175 private:
176   /// The base case for the recursive template below.
177   template <typename... T>
178   typename std::enable_if<sizeof...(T) == 0, FailureOr<Operation *>>::type
179   handleOp(Operation *op) {
180     return op;
181   }
182 
183   /// Applies all the handlers of the interfaces in the template list
184   /// implemented by 'op'. In particular, if an operation implements more than
185   /// one of the interfaces in the template list, all the associated handlers
186   /// will be applied to the operation in the same order as the template list
187   /// specifies. If a handler reports a failure or removes the operation without
188   /// replacement (indicated by returning 'nullptr'), no further handlers are
189   /// applied and the return value is propagated to the caller of 'handleOp'.
190   ///
191   /// The interface handlers job is to update the deallocation state, most
192   /// importantly the ownership map and list of memrefs to potentially be
193   /// deallocated per block, but also to insert `bufferization.dealloc`
194   /// operations where needed. Obviously, no MemRefs that may be used at a later
195   /// point in the control-flow may be deallocated and the ownership map has to
196   /// be updated to reflect potential ownership changes caused by the dealloc
197   /// operation (e.g., if two interfaces on the same op insert a dealloc
198   /// operation each, the second one should query the ownership map and use them
199   /// as deallocation condition such that MemRefs already deallocated in the
200   /// first dealloc operation are not deallocated a second time (double-free)).
201   /// Note that currently only the interfaces on terminators may insert dealloc
202   /// operations and it is verified as a precondition that a terminator op must
203   /// implement exactly one of the interfaces handling dealloc insertion.
204   ///
205   /// The return value of the 'handleInterface' functions should be a
206   /// FailureOr<Operation *> indicating whether there was a failure or otherwise
207   /// returning the operation itself or a replacement operation.
208   ///
209   /// Note: The difference compared to `TypeSwitch` is that all
210   /// matching cases are applied instead of just the first match.
211   template <typename InterfaceT, typename... InterfacesU>
212   FailureOr<Operation *> handleOp(Operation *op) {
213     Operation *next = op;
214     if (auto concreteOp = dyn_cast<InterfaceT>(op)) {
215       FailureOr<Operation *> result = handleInterface(concreteOp);
216       if (failed(result))
217         return failure();
218       next = *result;
219     }
220     if (!next)
221       return FailureOr<Operation *>(nullptr);
222     return handleOp<InterfacesU...>(next);
223   }
224 
225   /// Apply all supported interface handlers to the given op.
226   FailureOr<Operation *> handleAllInterfaces(Operation *op) {
227     if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(op))
228       return deallocOpInterface.process(state, options);
229 
230     if (failed(verifyOperationPreconditions(op)))
231       return failure();
232 
233     return handleOp<MemoryEffectOpInterface, RegionBranchOpInterface,
234                     CallOpInterface, BranchOpInterface,
235                     RegionBranchTerminatorOpInterface>(op);
236   }
237 
238   /// Make sure that for each forwarded MemRef value, an ownership indicator
239   /// `i1` value is forwarded as well such that the successor block knows
240   /// whether the MemRef has to be deallocated.
241   ///
242   /// Example:
243   /// ```
244   /// ^bb1:
245   ///   <more ops...>
246   ///   cf.br ^bb2(<forward-to-bb2>)
247   /// ```
248   /// becomes
249   /// ```
250   /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
251   /// // let r = getMemrefsToRetain(bb1, bb2, <forward-to-bb2>)
252   /// ^bb1:
253   ///   <more ops...>
254   ///   o = bufferization.dealloc m if c retain r
255   ///   // replace ownership(r) with o element-wise
256   ///   cf.br ^bb2(<forward-to-bb2>, o)
257   /// ```
258   FailureOr<Operation *> handleInterface(BranchOpInterface op);
259 
260   /// Add an ownership indicator for every forwarding MemRef operand and result.
261   /// Nested regions never take ownership of MemRefs owned by a parent region
262   /// (neither via forwarding operand nor when captured implicitly when the
263   /// region is not isolated from above). Ownerships will only be passed to peer
264   /// regions (when an operation has multiple regions, such as scf.while), or to
265   /// parent regions.
266   /// Note that the block arguments in the nested region are currently handled
267   /// centrally in the 'dealloc' function, but better interface support could
268   /// allow us to do this here for the nested region specifically to reduce the
269   /// amount of assumptions we make on the structure of ops implementing this
270   /// interface.
271   ///
272   /// Example:
273   /// ```
274   /// %ret = scf.for %i = %c0 to %c10 step %c1 iter_args(%m = %memref) {
275   ///   <more ops...>
276   ///   scf.yield %m : memref<2xi32>, i1
277   /// }
278   /// ```
279   /// becomes
280   /// ```
281   /// %ret:2 = scf.for %i = %c0 to %c10 step %c1
282   ///     iter_args(%m = %memref, %own = %false) {
283   ///   <more ops...>
284   ///   // Note that the scf.yield is handled by the
285   ///   // RegionBranchTerminatorOpInterface (not this handler)
286   ///   // let o = getMemrefWithUniqueOwnership(%own)
287   ///   scf.yield %m, o : memref<2xi32>, i1
288   /// }
289   /// ```
290   FailureOr<Operation *> handleInterface(RegionBranchOpInterface op);
291 
292   /// If the private-function-dynamic-ownership pass option is enabled and the
293   /// called function is private, additional results are added for each MemRef
294   /// result to pass the dynamic ownership indicator along. Otherwise, updates
295   /// the ownership map and list of memrefs to be deallocated according to the
296   /// function boundary ABI, i.e., assume ownership of all returned MemRefs.
297   ///
298   /// Example (assume `private-function-dynamic-ownership` is enabled):
299   /// ```
300   /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
301   /// func.func private @g(%arg0: memref<2xi32>) -> memref<2xi32> {...}
302   ///
303   /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
304   /// %ret_g = func.call @g(%memref) : (memref<2xi32>) -> memref<2xi32>
305   /// ```
306   /// becomes
307   /// ```
308   /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
309   /// func.func private @g(%arg0: memref<2xi32>) -> (memref<2xi32>, i1) {...}
310   ///
311   /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
312   /// // set ownership(%ret_f) := true
313   /// // remember to deallocate %ret_f
314   ///
315   /// %ret_g:2 = func.call @g(%memref) : (memref<2xi32>) -> (memref<2xi32>, i1)
316   /// // set ownership(%ret_g#0) := %ret_g#1
317   /// // remember to deallocate %ret_g if it comes with ownership
318   /// ```
319   FailureOr<Operation *> handleInterface(CallOpInterface op);
320 
321   /// Takes care of allocation and free side-effects. It collects allocated
322   /// MemRefs that we have to add to manually deallocate, but also removes
323   /// values again that are already deallocated before the end of the block. It
324   /// also updates the ownership map accordingly.
325   ///
326   /// Example:
327   /// ```
328   /// %alloc = memref.alloc()
329   /// %alloca = memref.alloca()
330   /// ```
331   /// becomes
332   /// ```
333   /// %alloc = memref.alloc()
334   /// %alloca = memref.alloca()
335   /// // set ownership(alloc) := true
336   /// // set ownership(alloca) := false
337   /// // remember to deallocate %alloc
338   /// ```
339   FailureOr<Operation *> handleInterface(MemoryEffectOpInterface op);
340 
341   /// Takes care that the function boundary ABI is adhered to if the parent
342   /// operation implements FunctionOpInterface, inserting a
343   /// `bufferization.clone` if necessary, and inserts the
344   /// `bufferization.dealloc` operation according to the ops operands.
345   ///
346   /// Example:
347   /// ```
348   /// ^bb1:
349   ///   <more ops...>
350   ///   func.return <return-vals>
351   /// ```
352   /// becomes
353   /// ```
354   /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
355   /// // let r = getMemrefsToRetain(bb1, nullptr, <return-vals>)
356   /// ^bb1:
357   ///   <more ops...>
358   ///   o = bufferization.dealloc m if c retain r
359   ///   func.return <return-vals>
360   ///     (if !isFunctionWithoutDynamicOwnership: append o)
361   /// ```
362   FailureOr<Operation *> handleInterface(RegionBranchTerminatorOpInterface op);
363 
364   /// Construct a new operation which is exactly the same as the passed 'op'
365   /// except that the OpResults list is appended by new results of the passed
366   /// 'types'.
367   /// TODO: ideally, this would be implemented using an OpInterface because it
368   /// is used to append function results, loop iter_args, etc. and thus makes
369   /// some assumptions that the variadic list of those is at the end of the
370   /// OpResults range.
371   Operation *appendOpResults(Operation *op, ArrayRef<Type> types);
372 
373   /// A convenience template for the generic 'appendOpResults' function above to
374   /// avoid manual casting of the result.
375   template <typename OpTy>
376   OpTy appendOpResults(OpTy op, ArrayRef<Type> types) {
377     return cast<OpTy>(appendOpResults(op.getOperation(), types));
378   }
379 
380   /// Performs deallocation of a single basic block. This is a private function
381   /// because some internal data structures have to be set up beforehand and
382   /// this function has to be called on blocks in a region in dominance order.
383   LogicalResult deallocate(Block *block);
384 
385   /// After all relevant interfaces of an operation have been processed by the
386   /// 'handleInterface' functions, this function sets the ownership of operation
387   /// results that have not been set yet by the 'handleInterface' functions. It
388   /// generally assumes that each result can alias with every operand of the
389   /// operation, if there are MemRef typed results but no MemRef operands it
390   /// assigns 'false' as ownership. This happens, e.g., for the
391   /// memref.get_global operation. It would also be possible to query some alias
392   /// analysis to get more precise ownerships, however, the analysis would have
393   /// to be updated according to the IR modifications this pass performs (e.g.,
394   /// re-building operations to have more result values, inserting clone
395   /// operations, etc.).
396   void populateRemainingOwnerships(Operation *op);
397 
398   /// Given an SSA value of MemRef type, returns the same of a new SSA value
399   /// which has 'Unique' ownership where the ownership indicator is guaranteed
400   /// to be always 'true'.
401   Value materializeMemrefWithGuaranteedOwnership(OpBuilder &builder,
402                                                  Value memref, Block *block);
403 
404   /// Returns whether the given operation implements FunctionOpInterface, has
405   /// private visibility, and the private-function-dynamic-ownership pass option
406   /// is enabled.
407   bool isFunctionWithoutDynamicOwnership(Operation *op);
408 
409   /// Given an SSA value of MemRef type, this function queries the
410   /// BufferDeallocationOpInterface of the defining operation of 'memref' for a
411   /// materialized ownership indicator for 'memref'.  If the op does not
412   /// implement the interface or if the block for which the materialized value
413   /// is requested does not match the block in which 'memref' is defined, the
414   /// default implementation in
415   /// `DeallocationState::getMemrefWithUniqueOwnership` is queried instead.
416   std::pair<Value, Value>
417   materializeUniqueOwnership(OpBuilder &builder, Value memref, Block *block);
418 
419   /// Checks all the preconditions for operations implementing the
420   /// FunctionOpInterface that have to hold for the deallocation to be
421   /// applicable:
422   /// (1) Checks that there are not explicit control flow loops.
423   static LogicalResult verifyFunctionPreconditions(FunctionOpInterface op);
424 
425   /// Checks all the preconditions for operations inside the region of
426   /// operations implementing the FunctionOpInterface that have to hold for the
427   /// deallocation to be applicable:
428   /// (1) Checks if all operations that have at least one attached region
429   /// implement the RegionBranchOpInterface. This is not required in edge cases,
430   /// where we have a single attached region and the parent operation has no
431   /// results.
432   /// (2) Checks that no deallocations already exist. Especially deallocations
433   /// in nested regions are not properly supported yet since this requires
434   /// ownership of the memref to be transferred to the nested region, which does
435   /// not happen by default.  This constrained can be lifted in the future.
436   /// (3) Checks that terminators with more than one successor except
437   /// `cf.cond_br` are not present and that either BranchOpInterface or
438   /// RegionBranchTerminatorOpInterface is implemented.
439   static LogicalResult verifyOperationPreconditions(Operation *op);
440 
441   /// When the 'private-function-dynamic-ownership' pass option is enabled,
442   /// additional `i1` return values are added for each MemRef result in the
443   /// function signature. This function takes care of updating the
444   /// `function_type` attribute of the function according to the actually
445   /// returned values from the terminators.
446   static LogicalResult updateFunctionSignature(FunctionOpInterface op);
447 
448 private:
449   ///  Collects all analysis state and including liveness, caches, ownerships of
450   ///  already processed values and operations, and the MemRefs that have to be
451   ///  deallocated at the end of each block.
452   DeallocationState state;
453 
454   /// Collects all pass options in a single place.
455   DeallocationOptions options;
456 };
457 
458 } // namespace
459 
460 //===----------------------------------------------------------------------===//
461 // BufferDeallocation Implementation
462 //===----------------------------------------------------------------------===//
463 
464 std::pair<Value, Value>
465 BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref,
466                                                Block *block) {
467   // The interface can only materialize ownership indicators in the same block
468   // as the defining op.
469   if (memref.getParentBlock() != block)
470     return state.getMemrefWithUniqueOwnership(builder, memref, block);
471 
472   Operation *owner = memref.getDefiningOp();
473   if (!owner)
474     owner = memref.getParentBlock()->getParentOp();
475 
476   // If the op implements the interface, query it for a materialized ownership
477   // value.
478   if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(owner))
479     return deallocOpInterface.materializeUniqueOwnershipForMemref(
480         state, options, builder, memref);
481 
482   // Otherwise use the default implementation.
483   return state.getMemrefWithUniqueOwnership(builder, memref, block);
484 }
485 
486 LogicalResult
487 BufferDeallocation::verifyFunctionPreconditions(FunctionOpInterface op) {
488   // (1) Ensure that there are supported loops only (no explicit control flow
489   // loops).
490   Backedges backedges(op);
491   if (backedges.size()) {
492     op->emitError("Only structured control-flow loops are supported.");
493     return failure();
494   }
495 
496   return success();
497 }
498 
499 LogicalResult BufferDeallocation::verifyOperationPreconditions(Operation *op) {
500   // We do not care about ops that do not operate on buffers and have no
501   // Allocate/Free side effect.
502   if (!hasBufferSemantics(op) && hasNeitherAllocateNorFreeSideEffect(op))
503     return success();
504 
505   // (1) The pass does not work properly when deallocations are already present.
506   // Alternatively, we could also remove all deallocations as a pre-pass.
507   if (isa<DeallocOp>(op))
508     return op->emitError(
509         "No deallocation operations must be present when running this pass!");
510 
511   // (2) Memory side effects of unregistered ops are unknown. In particular, we
512   // do not know whether an unregistered op allocates memory or not.
513   // - Ops with recursive memory effects are allowed. All nested ops in the
514   //   regions of `op` will be analyzed separately.
515   // - Call ops are allowed even though they typically do not implement the
516   //   MemoryEffectOpInterface. They usually do not have side effects apart
517   //   from the callee, which will be analyzed separately. (This is similar to
518   //   "recursive memory effects".)
519   if (!isa<MemoryEffectOpInterface>(op) &&
520       !op->hasTrait<OpTrait::HasRecursiveMemoryEffects>() &&
521       !isa<CallOpInterface>(op))
522     return op->emitError(
523         "ops with unknown memory side effects are not supported");
524 
525   // (3) Check that the control flow structures are supported.
526   auto regions = op->getRegions();
527   // Check that if the operation has at
528   // least one region it implements the RegionBranchOpInterface. If there
529   // is an operation that does not fulfill this condition, we cannot apply
530   // the deallocation steps. Furthermore, we accept cases, where we have a
531   // region that returns no results, since, in that case, the intra-region
532   // control flow does not affect the transformation.
533   size_t size = regions.size();
534   if (((size == 1 && !op->getResults().empty()) || size > 1) &&
535       !dyn_cast<RegionBranchOpInterface>(op)) {
536     return op->emitError("All operations with attached regions need to "
537                          "implement the RegionBranchOpInterface.");
538   }
539 
540   // (3) Check that terminators with more than one successor except `cf.cond_br`
541   // are not present and that either BranchOpInterface or
542   // RegionBranchTerminatorOpInterface is implemented.
543   if (op->hasTrait<OpTrait::NoTerminator>())
544     return op->emitError("NoTerminator trait is not supported");
545 
546   if (op->hasTrait<OpTrait::IsTerminator>()) {
547     // Either one of those interfaces has to be implemented on terminators, but
548     // not both.
549     if (!isa<BranchOpInterface, RegionBranchTerminatorOpInterface>(op) ||
550         (isa<BranchOpInterface>(op) &&
551          isa<RegionBranchTerminatorOpInterface>(op)))
552 
553       return op->emitError(
554           "Terminators must implement either BranchOpInterface or "
555           "RegionBranchTerminatorOpInterface (but not both)!");
556 
557     // We only support terminators with 0 or 1 successors for now and
558     // special-case the conditional branch op.
559     if (op->getSuccessors().size() > 1)
560 
561       return op->emitError("Terminators with more than one successor "
562                            "are not supported!");
563   }
564 
565   return success();
566 }
567 
568 LogicalResult
569 BufferDeallocation::updateFunctionSignature(FunctionOpInterface op) {
570   SmallVector<TypeRange> returnOperandTypes(llvm::map_range(
571       op.getFunctionBody().getOps<RegionBranchTerminatorOpInterface>(),
572       [](RegionBranchTerminatorOpInterface op) {
573         return op.getSuccessorOperands(RegionBranchPoint::parent()).getTypes();
574       }));
575   if (!llvm::all_equal(returnOperandTypes))
576     return op->emitError(
577         "there are multiple return operations with different operand types");
578 
579   TypeRange resultTypes = op.getResultTypes();
580   // Check if we found a return operation because that doesn't necessarily
581   // always have to be the case, e.g., consider a function with one block that
582   // has a cf.br at the end branching to itself again (i.e., an infinite loop).
583   // In that case we don't want to crash but just not update the return types.
584   if (!returnOperandTypes.empty())
585     resultTypes = returnOperandTypes[0];
586 
587   op.setFunctionTypeAttr(TypeAttr::get(FunctionType::get(
588       op->getContext(), op.getFunctionBody().front().getArgumentTypes(),
589       resultTypes)));
590 
591   return success();
592 }
593 
594 LogicalResult BufferDeallocation::deallocate(FunctionOpInterface op) {
595   // Stop and emit a proper error message if we don't support the input IR.
596   if (failed(verifyFunctionPreconditions(op)))
597     return failure();
598 
599   // Process the function block by block.
600   auto result = op->walk<WalkOrder::PostOrder, ForwardDominanceIterator<>>(
601       [&](Block *block) {
602         if (failed(deallocate(block)))
603           return WalkResult::interrupt();
604         return WalkResult::advance();
605       });
606   if (result.wasInterrupted())
607     return failure();
608 
609   // Update the function signature if the function is private, dynamic ownership
610   // is enabled, and the function has memrefs as arguments or results.
611   return updateFunctionSignature(op);
612 }
613 
614 LogicalResult BufferDeallocation::deallocate(Block *block) {
615   OpBuilder builder = OpBuilder::atBlockBegin(block);
616 
617   // Compute liveness transfers of ownership to this block.
618   SmallVector<Value> liveMemrefs;
619   state.getLiveMemrefsIn(block, liveMemrefs);
620   for (auto li : liveMemrefs) {
621     // Ownership of implicitly captured memrefs from other regions is never
622     // taken, but ownership of memrefs in the same region (but different block)
623     // is taken.
624     if (li.getParentRegion() == block->getParent()) {
625       state.updateOwnership(li, state.getOwnership(li, li.getParentBlock()),
626                             block);
627       state.addMemrefToDeallocate(li, block);
628       continue;
629     }
630 
631     if (li.getParentRegion()->isProperAncestor(block->getParent())) {
632       Value falseVal = buildBoolValue(builder, li.getLoc(), false);
633       state.updateOwnership(li, falseVal, block);
634     }
635   }
636 
637   for (unsigned i = 0, e = block->getNumArguments(); i < e; ++i) {
638     BlockArgument arg = block->getArgument(i);
639     if (!isMemref(arg))
640       continue;
641 
642     // Adhere to function boundary ABI: no ownership of function argument
643     // MemRefs is taken.
644     if (isa<FunctionOpInterface>(block->getParentOp()) &&
645         block->isEntryBlock()) {
646       Value newArg = buildBoolValue(builder, arg.getLoc(), false);
647       state.updateOwnership(arg, newArg);
648       state.addMemrefToDeallocate(arg, block);
649       continue;
650     }
651 
652     // Pass MemRef ownerships along via `i1` values.
653     Value newArg = block->addArgument(builder.getI1Type(), arg.getLoc());
654     state.updateOwnership(arg, newArg);
655     state.addMemrefToDeallocate(arg, block);
656   }
657 
658   // For each operation in the block, handle the interfaces that affect aliasing
659   // and ownership of memrefs.
660   for (Operation &op : llvm::make_early_inc_range(*block)) {
661     FailureOr<Operation *> result = handleAllInterfaces(&op);
662     if (failed(result))
663       return failure();
664     if (!*result)
665       continue;
666 
667     populateRemainingOwnerships(*result);
668   }
669 
670   // TODO: if block has no terminator, handle dealloc insertion here.
671   return success();
672 }
673 
674 Operation *BufferDeallocation::appendOpResults(Operation *op,
675                                                ArrayRef<Type> types) {
676   SmallVector<Type> newTypes(op->getResultTypes());
677   newTypes.append(types.begin(), types.end());
678   auto *newOp = Operation::create(op->getLoc(), op->getName(), newTypes,
679                                   op->getOperands(), op->getAttrDictionary(),
680                                   op->getPropertiesStorage(),
681                                   op->getSuccessors(), op->getNumRegions());
682   for (auto [oldRegion, newRegion] :
683        llvm::zip(op->getRegions(), newOp->getRegions()))
684     newRegion.takeBody(oldRegion);
685 
686   OpBuilder(op).insert(newOp);
687   op->replaceAllUsesWith(newOp->getResults().take_front(op->getNumResults()));
688   op->erase();
689 
690   return newOp;
691 }
692 
693 FailureOr<Operation *>
694 BufferDeallocation::handleInterface(RegionBranchOpInterface op) {
695   OpBuilder builder = OpBuilder::atBlockBegin(op->getBlock());
696 
697   // TODO: the RegionBranchOpInterface does not provide all the necessary
698   // methods to perform this transformation without additional assumptions on
699   // the structure. In particular, that
700   // * additional values to be passed to the next region can be added to the end
701   //   of the operand list, the end of the block argument list, and the end of
702   //   the result value list. However, it seems to be the general guideline for
703   //   operations implementing this interface to follow this structure.
704   // * and that the block arguments and result values match the forwarded
705   //   operands one-to-one (i.e., that there are no other values appended to the
706   //   front).
707   // These assumptions are satisfied by the `scf.if`, `scf.for`, and `scf.while`
708   // operations.
709 
710   SmallVector<RegionSuccessor> regions;
711   op.getSuccessorRegions(RegionBranchPoint::parent(), regions);
712   assert(!regions.empty() && "Must have at least one successor region");
713   SmallVector<Value> entryOperands(
714       op.getEntrySuccessorOperands(regions.front()));
715   unsigned numMemrefOperands = llvm::count_if(entryOperands, isMemref);
716 
717   // No ownership is acquired for any MemRefs that are passed to the region from
718   // the outside.
719   Value falseVal = buildBoolValue(builder, op.getLoc(), false);
720   op->insertOperands(op->getNumOperands(),
721                      SmallVector<Value>(numMemrefOperands, falseVal));
722 
723   int counter = op->getNumResults();
724   unsigned numMemrefResults = llvm::count_if(op->getResults(), isMemref);
725   SmallVector<Type> ownershipResults(numMemrefResults, builder.getI1Type());
726   RegionBranchOpInterface newOp = appendOpResults(op, ownershipResults);
727 
728   for (auto result : llvm::make_filter_range(newOp->getResults(), isMemref)) {
729     state.updateOwnership(result, newOp->getResult(counter++));
730     state.addMemrefToDeallocate(result, newOp->getBlock());
731   }
732 
733   return newOp.getOperation();
734 }
735 
736 Value BufferDeallocation::materializeMemrefWithGuaranteedOwnership(
737     OpBuilder &builder, Value memref, Block *block) {
738   // First, make sure we at least have 'Unique' ownership already.
739   std::pair<Value, Value> newMemrefAndOnwership =
740       materializeUniqueOwnership(builder, memref, block);
741   Value newMemref = newMemrefAndOnwership.first;
742   Value condition = newMemrefAndOnwership.second;
743 
744   // Avoid inserting additional IR if ownership is already guaranteed. In
745   // particular, this is already the case when we had 'Unknown' ownership
746   // initially and a clone was inserted to get to 'Unique' ownership.
747   if (matchPattern(condition, m_One()))
748     return newMemref;
749 
750   // Insert a runtime check and only clone if we still don't have ownership at
751   // runtime.
752   Value maybeClone =
753       builder
754           .create<scf::IfOp>(
755               memref.getLoc(), condition,
756               [&](OpBuilder &builder, Location loc) {
757                 builder.create<scf::YieldOp>(loc, newMemref);
758               },
759               [&](OpBuilder &builder, Location loc) {
760                 Value clone =
761                     builder.create<bufferization::CloneOp>(loc, newMemref);
762                 builder.create<scf::YieldOp>(loc, clone);
763               })
764           .getResult(0);
765   Value trueVal = buildBoolValue(builder, memref.getLoc(), true);
766   state.updateOwnership(maybeClone, trueVal);
767   state.addMemrefToDeallocate(maybeClone, maybeClone.getParentBlock());
768   return maybeClone;
769 }
770 
771 FailureOr<Operation *>
772 BufferDeallocation::handleInterface(BranchOpInterface op) {
773   if (op->getNumSuccessors() > 1)
774     return op->emitError("BranchOpInterface operations with multiple "
775                          "successors are not supported yet");
776 
777   if (op->getNumSuccessors() != 1)
778     return emitError(op.getLoc(),
779                      "only BranchOpInterface operations with exactly "
780                      "one successor are supported yet");
781 
782   if (op.getSuccessorOperands(0).getProducedOperandCount() > 0)
783     return op.emitError("produced operands are not supported");
784 
785   // Collect the values to deallocate and retain and use them to create the
786   // dealloc operation.
787   Block *block = op->getBlock();
788   OpBuilder builder(op);
789   SmallVector<Value> memrefs, conditions, toRetain;
790   if (failed(state.getMemrefsAndConditionsToDeallocate(
791           builder, op.getLoc(), block, memrefs, conditions)))
792     return failure();
793 
794   OperandRange forwardedOperands =
795       op.getSuccessorOperands(0).getForwardedOperands();
796   state.getMemrefsToRetain(block, op->getSuccessor(0), forwardedOperands,
797                            toRetain);
798 
799   auto deallocOp = builder.create<bufferization::DeallocOp>(
800       op.getLoc(), memrefs, conditions, toRetain);
801 
802   // We want to replace the current ownership of the retained values with the
803   // result values of the dealloc operation as they are always unique.
804   state.resetOwnerships(deallocOp.getRetained(), block);
805   for (auto [retained, ownership] :
806        llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions())) {
807     state.updateOwnership(retained, ownership, block);
808   }
809 
810   unsigned numAdditionalReturns = llvm::count_if(forwardedOperands, isMemref);
811   SmallVector<Value> newOperands(forwardedOperands);
812   auto additionalConditions =
813       deallocOp.getUpdatedConditions().take_front(numAdditionalReturns);
814   newOperands.append(additionalConditions.begin(), additionalConditions.end());
815   op.getSuccessorOperands(0).getMutableForwardedOperands().assign(newOperands);
816 
817   return op.getOperation();
818 }
819 
820 FailureOr<Operation *> BufferDeallocation::handleInterface(CallOpInterface op) {
821   OpBuilder builder(op);
822 
823   // Lookup the function operation and check if it has private visibility. If
824   // the function is referenced by SSA value instead of a Symbol, it's assumed
825   // to be public. (And we cannot easily change the type of the SSA value
826   // anyway.)
827   Operation *funcOp = op.resolveCallableInTable(state.getSymbolTable());
828   bool isPrivate = false;
829   if (auto symbol = dyn_cast_or_null<SymbolOpInterface>(funcOp))
830     isPrivate = symbol.isPrivate() && !symbol.isDeclaration();
831 
832   // If the private-function-dynamic-ownership option is enabled and we are
833   // calling a private function, we need to add an additional `i1` result for
834   // each MemRef result to dynamically pass the current ownership indicator
835   // rather than adhering to the function boundary ABI.
836   if (options.privateFuncDynamicOwnership && isPrivate) {
837     unsigned numMemrefs = llvm::count_if(op->getResults(), isMemref);
838     SmallVector<Type> ownershipTypesToAppend(numMemrefs, builder.getI1Type());
839     unsigned ownershipCounter = op->getNumResults();
840     op = appendOpResults(op, ownershipTypesToAppend);
841 
842     for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
843       state.updateOwnership(result, op->getResult(ownershipCounter++));
844       state.addMemrefToDeallocate(result, result.getParentBlock());
845     }
846 
847     return op.getOperation();
848   }
849 
850   // According to the function boundary ABI we are guaranteed to get ownership
851   // of all MemRefs returned by the function. Thus we set ownership to constant
852   // 'true' and remember to deallocate it.
853   Value trueVal = buildBoolValue(builder, op.getLoc(), true);
854   for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
855     state.updateOwnership(result, trueVal);
856     state.addMemrefToDeallocate(result, result.getParentBlock());
857   }
858 
859   return op.getOperation();
860 }
861 
862 FailureOr<Operation *>
863 BufferDeallocation::handleInterface(MemoryEffectOpInterface op) {
864   auto *block = op->getBlock();
865   OpBuilder builder = OpBuilder::atBlockBegin(block);
866 
867   for (auto operand : llvm::make_filter_range(op->getOperands(), isMemref)) {
868     if (op.getEffectOnValue<MemoryEffects::Free>(operand).has_value()) {
869       // The bufferization.manual_deallocation attribute can be attached to ops
870       // with an allocation and/or deallocation side effect. It indicates that
871       // the op is under a "manual deallocation" scheme. Deallocation ops are
872       // usually forbidden in the input IR (not supported by the buffer
873       // deallocation pass). However, if they are under manual deallocation,
874       // they can be safely ignored by the buffer deallocation pass.
875       if (!op->hasAttr(BufferizationDialect::kManualDeallocation))
876         return op->emitError(
877             "memory free side-effect on MemRef value not supported!");
878 
879       // Buffers that were allocated under "manual deallocation" may be
880       // manually deallocated. We insert a runtime assertion to cover certain
881       // cases of invalid IR where an automatically managed buffer allocation
882       // is manually deallocated. This is not a bulletproof check!
883       OpBuilder::InsertionGuard g(builder);
884       builder.setInsertionPoint(op);
885       Ownership ownership = state.getOwnership(operand, block);
886       if (ownership.isUnique()) {
887         Value ownershipInverted = builder.create<arith::XOrIOp>(
888             op.getLoc(), ownership.getIndicator(),
889             buildBoolValue(builder, op.getLoc(), true));
890         builder.create<cf::AssertOp>(
891             op.getLoc(), ownershipInverted,
892             "expected that the block does not have ownership");
893       }
894     }
895   }
896 
897   for (auto res : llvm::make_filter_range(op->getResults(), isMemref)) {
898     auto allocEffect = op.getEffectOnValue<MemoryEffects::Allocate>(res);
899     if (allocEffect.has_value()) {
900       if (isa<SideEffects::AutomaticAllocationScopeResource>(
901               allocEffect->getResource())) {
902         // Make sure that the ownership of auto-managed allocations is set to
903         // false. This is important for operations that have at least one memref
904         // typed operand. E.g., consider an operation like `bufferization.clone`
905         // that lowers to a `memref.alloca + memref.copy` instead of a
906         // `memref.alloc`. If we wouldn't set the ownership of the result here,
907         // the default ownership population in `populateRemainingOwnerships`
908         // would assume aliasing with the MemRef operand.
909         state.resetOwnerships(res, block);
910         state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
911         continue;
912       }
913 
914       if (op->hasAttr(BufferizationDialect::kManualDeallocation)) {
915         // This allocation will be deallocated manually. Assign an ownership of
916         // "false", so that it will never be deallocated by the buffer
917         // deallocation pass.
918         state.resetOwnerships(res, block);
919         state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
920         continue;
921       }
922 
923       state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), true));
924       state.addMemrefToDeallocate(res, block);
925     }
926   }
927 
928   return op.getOperation();
929 }
930 
931 FailureOr<Operation *>
932 BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) {
933   OpBuilder builder(op);
934 
935   // If this is a return operation of a function that is not private or the
936   // dynamic function boundary ownership is disabled, we need to return memref
937   // values for which we have guaranteed ownership to pass on to adhere to the
938   // function boundary ABI.
939   bool funcWithoutDynamicOwnership =
940       isFunctionWithoutDynamicOwnership(op->getParentOp());
941   if (funcWithoutDynamicOwnership) {
942     for (OpOperand &val : op->getOpOperands()) {
943       if (!isMemref(val.get()))
944         continue;
945 
946       val.set(materializeMemrefWithGuaranteedOwnership(builder, val.get(),
947                                                        op->getBlock()));
948     }
949   }
950 
951   // TODO: getSuccessorRegions is not implemented by all operations we care
952   // about, but we would need to check how many successors there are and under
953   // which condition they are taken, etc.
954 
955   MutableOperandRange operands =
956       op.getMutableSuccessorOperands(RegionBranchPoint::parent());
957 
958   SmallVector<Value> updatedOwnerships;
959   auto result = deallocation_impl::insertDeallocOpForReturnLike(
960       state, op, operands.getAsOperandRange(), updatedOwnerships);
961   if (failed(result) || !*result)
962     return result;
963 
964   // Add an additional operand for every MemRef for the ownership indicator.
965   if (!funcWithoutDynamicOwnership) {
966     SmallVector<Value> newOperands{operands.getAsOperandRange()};
967     newOperands.append(updatedOwnerships.begin(), updatedOwnerships.end());
968     operands.assign(newOperands);
969   }
970 
971   return op.getOperation();
972 }
973 
974 bool BufferDeallocation::isFunctionWithoutDynamicOwnership(Operation *op) {
975   auto funcOp = dyn_cast<FunctionOpInterface>(op);
976   return funcOp && (!options.privateFuncDynamicOwnership ||
977                     !funcOp.isPrivate() || funcOp.isExternal());
978 }
979 
980 void BufferDeallocation::populateRemainingOwnerships(Operation *op) {
981   for (auto res : op->getResults()) {
982     if (!isMemref(res))
983       continue;
984     if (!state.getOwnership(res, op->getBlock()).isUninitialized())
985       continue;
986 
987     // The op does not allocate memory, otherwise, it would have been assigned
988     // an ownership during `handleInterface`. Assume the result may alias with
989     // any memref operand and thus combine all their ownerships.
990     for (auto operand : op->getOperands()) {
991       if (!isMemref(operand))
992         continue;
993 
994       state.updateOwnership(
995           res, state.getOwnership(operand, operand.getParentBlock()),
996           op->getBlock());
997     }
998 
999     // If the ownership value is still uninitialized (e.g., because the op has
1000     // no memref operands), assume that no ownership is taken. E.g., this is the
1001     // case for "memref.get_global".
1002     //
1003     // Note: This can lead to memory leaks if memory side effects are not
1004     // properly specified on the op.
1005     if (state.getOwnership(res, op->getBlock()).isUninitialized()) {
1006       OpBuilder builder(op);
1007       state.updateOwnership(res, buildBoolValue(builder, op->getLoc(), false));
1008     }
1009   }
1010 }
1011 
1012 //===----------------------------------------------------------------------===//
1013 // OwnershipBasedBufferDeallocationPass
1014 //===----------------------------------------------------------------------===//
1015 
1016 namespace {
1017 
1018 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
1019 /// into the right positions. Furthermore, it inserts additional clones if
1020 /// necessary. It uses the algorithm described at the top of the file.
1021 struct OwnershipBasedBufferDeallocationPass
1022     : public bufferization::impl::OwnershipBasedBufferDeallocationBase<
1023           OwnershipBasedBufferDeallocationPass> {
1024   OwnershipBasedBufferDeallocationPass() = default;
1025   OwnershipBasedBufferDeallocationPass(DeallocationOptions options)
1026       : OwnershipBasedBufferDeallocationPass() {
1027     this->privateFuncDynamicOwnership.setValue(
1028         options.privateFuncDynamicOwnership);
1029   }
1030   void runOnOperation() override {
1031     DeallocationOptions options;
1032     options.privateFuncDynamicOwnership = privateFuncDynamicOwnership;
1033 
1034     auto status = getOperation()->walk([&](func::FuncOp func) {
1035       if (func.isExternal())
1036         return WalkResult::skip();
1037 
1038       if (failed(deallocateBuffersOwnershipBased(func, options)))
1039         return WalkResult::interrupt();
1040 
1041       return WalkResult::advance();
1042     });
1043     if (status.wasInterrupted())
1044       signalPassFailure();
1045   }
1046 };
1047 
1048 } // namespace
1049 
1050 //===----------------------------------------------------------------------===//
1051 // Implement bufferization API
1052 //===----------------------------------------------------------------------===//
1053 
1054 LogicalResult
1055 bufferization::deallocateBuffersOwnershipBased(FunctionOpInterface op,
1056                                                DeallocationOptions options) {
1057   // Gather all required allocation nodes and prepare the deallocation phase.
1058   BufferDeallocation deallocation(op, options);
1059 
1060   // Place all required temporary clone and dealloc nodes.
1061   return deallocation.deallocate(op);
1062 }
1063 
1064 //===----------------------------------------------------------------------===//
1065 // OwnershipBasedBufferDeallocationPass construction
1066 //===----------------------------------------------------------------------===//
1067 
1068 std::unique_ptr<Pass>
1069 mlir::bufferization::createOwnershipBasedBufferDeallocationPass(
1070     DeallocationOptions options) {
1071   return std::make_unique<OwnershipBasedBufferDeallocationPass>(options);
1072 }
1073