xref: /llvm-project/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp (revision 65341b09b0d54ce5318e26a63b84138695d2ac35)
1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for computing correct alloc and dealloc positions.
10 // Furthermore, buffer deallocation also adds required new clone operations to
11 // ensure that all buffers are deallocated. The main class is the
12 // BufferDeallocationPass class that implements the underlying algorithm. In
13 // order to put allocations and deallocations at safe positions, it is
14 // significantly important to put them into the correct blocks. However, the
15 // liveness analysis does not pay attention to aliases, which can occur due to
16 // branches (and their associated block arguments) in general. For this purpose,
17 // BufferDeallocation firstly finds all possible aliases for a single value
18 // (using the BufferViewFlowAnalysis class). Consider the following example:
19 //
20 // ^bb0(%arg0):
21 //   cf.cond_br %cond, ^bb1, ^bb2
22 // ^bb1:
23 //   cf.br ^exit(%arg0)
24 // ^bb2:
25 //   %new_value = ...
26 //   cf.br ^exit(%new_value)
27 // ^exit(%arg1):
28 //   return %arg1;
29 //
30 // We should place the dealloc for %new_value in exit. However, we have to free
31 // the buffer in the same block, because it cannot be freed in the post
32 // dominator. However, this requires a new clone buffer for %arg1 that will
33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we
34 // will find out that %new_value has a potential alias %arg1. In order to find
35 // the dealloc position we have to find all potential aliases, iterate over
36 // their uses and find the common post-dominator block (note that additional
37 // clones and buffers remove potential aliases and will influence the placement
38 // of the deallocs). In all cases, the computed block can be safely used to free
39 // the %new_value buffer (may be exit or bb2) as it will die and we can use
40 // liveness information to determine the exact operation after which we have to
41 // insert the dealloc. However, the algorithm supports introducing clone buffers
42 // and placing deallocs in safe locations to ensure that all buffers will be
43 // freed in the end.
44 //
45 // TODO:
46 // The current implementation does not support explicit-control-flow loops and
47 // the resulting code will be invalid with respect to program semantics.
48 // However, structured control-flow loops are fully supported. Furthermore, it
49 // doesn't accept functions which return buffers already.
50 //
51 //===----------------------------------------------------------------------===//
52 
53 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
54 
55 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
56 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
57 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
58 #include "mlir/Dialect/Func/IR/FuncOps.h"
59 #include "mlir/Dialect/MemRef/IR/MemRef.h"
60 #include "llvm/ADT/SetOperations.h"
61 
62 namespace mlir {
63 namespace bufferization {
64 #define GEN_PASS_DEF_BUFFERDEALLOCATION
65 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
66 } // namespace bufferization
67 } // namespace mlir
68 
69 using namespace mlir;
70 using namespace mlir::bufferization;
71 
72 /// Walks over all immediate return-like terminators in the given region.
walkReturnOperations(Region * region,llvm::function_ref<LogicalResult (RegionBranchTerminatorOpInterface)> func)73 static LogicalResult walkReturnOperations(
74     Region *region,
75     llvm::function_ref<LogicalResult(RegionBranchTerminatorOpInterface)> func) {
76   for (Block &block : *region) {
77     Operation *terminator = block.getTerminator();
78     // Skip non region-return-like terminators.
79     if (auto regionTerminator =
80             dyn_cast<RegionBranchTerminatorOpInterface>(terminator)) {
81       if (failed(func(regionTerminator)))
82         return failure();
83     }
84   }
85   return success();
86 }
87 
88 /// Checks if all operations that have at least one attached region implement
89 /// the RegionBranchOpInterface. This is not required in edge cases, where we
90 /// have a single attached region and the parent operation has no results.
validateSupportedControlFlow(Operation * op)91 static bool validateSupportedControlFlow(Operation *op) {
92   WalkResult result = op->walk([&](Operation *operation) {
93     // Only check ops that are inside a function.
94     if (!operation->getParentOfType<func::FuncOp>())
95       return WalkResult::advance();
96 
97     auto regions = operation->getRegions();
98     // Walk over all operations in a region and check if the operation has at
99     // least one region and implements the RegionBranchOpInterface. If there
100     // is an operation that does not fulfill this condition, we cannot apply
101     // the deallocation steps. Furthermore, we accept cases, where we have a
102     // region that returns no results, since, in that case, the intra-region
103     // control flow does not affect the transformation.
104     size_t size = regions.size();
105     if (((size == 1 && !operation->getResults().empty()) || size > 1) &&
106         !dyn_cast<RegionBranchOpInterface>(operation)) {
107       operation->emitError("All operations with attached regions need to "
108                            "implement the RegionBranchOpInterface.");
109     }
110 
111     return WalkResult::advance();
112   });
113   return !result.wasSkipped();
114 }
115 
116 namespace {
117 
118 //===----------------------------------------------------------------------===//
119 // Backedges analysis
120 //===----------------------------------------------------------------------===//
121 
122 /// A straight-forward program analysis which detects loop backedges induced by
123 /// explicit control flow.
124 class Backedges {
125 public:
126   using BlockSetT = SmallPtrSet<Block *, 16>;
127   using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
128 
129 public:
130   /// Constructs a new backedges analysis using the op provided.
Backedges(Operation * op)131   Backedges(Operation *op) { recurse(op); }
132 
133   /// Returns the number of backedges formed by explicit control flow.
size() const134   size_t size() const { return edgeSet.size(); }
135 
136   /// Returns the start iterator to loop over all backedges.
begin() const137   BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
138 
139   /// Returns the end iterator to loop over all backedges.
end() const140   BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
141 
142 private:
143   /// Enters the current block and inserts a backedge into the `edgeSet` if we
144   /// have already visited the current block. The inserted edge links the given
145   /// `predecessor` with the `current` block.
enter(Block & current,Block * predecessor)146   bool enter(Block &current, Block *predecessor) {
147     bool inserted = visited.insert(&current).second;
148     if (!inserted)
149       edgeSet.insert(std::make_pair(predecessor, &current));
150     return inserted;
151   }
152 
153   /// Leaves the current block.
exit(Block & current)154   void exit(Block &current) { visited.erase(&current); }
155 
156   /// Recurses into the given operation while taking all attached regions into
157   /// account.
recurse(Operation * op)158   void recurse(Operation *op) {
159     Block *current = op->getBlock();
160     // If the current op implements the `BranchOpInterface`, there can be
161     // cycles in the scope of all successor blocks.
162     if (isa<BranchOpInterface>(op)) {
163       for (Block *succ : current->getSuccessors())
164         recurse(*succ, current);
165     }
166     // Recurse into all distinct regions and check for explicit control-flow
167     // loops.
168     for (Region &region : op->getRegions()) {
169       if (!region.empty())
170         recurse(region.front(), current);
171     }
172   }
173 
174   /// Recurses into explicit control-flow structures that are given by
175   /// the successor relation defined on the block level.
recurse(Block & block,Block * predecessor)176   void recurse(Block &block, Block *predecessor) {
177     // Try to enter the current block. If this is not possible, we are
178     // currently processing this block and can safely return here.
179     if (!enter(block, predecessor))
180       return;
181 
182     // Recurse into all operations and successor blocks.
183     for (Operation &op : block.getOperations())
184       recurse(&op);
185 
186     // Leave the current block.
187     exit(block);
188   }
189 
190   /// Stores all blocks that are currently visited and on the processing stack.
191   BlockSetT visited;
192 
193   /// Stores all backedges in the format (source, target).
194   BackedgeSetT edgeSet;
195 };
196 
197 //===----------------------------------------------------------------------===//
198 // BufferDeallocation
199 //===----------------------------------------------------------------------===//
200 
201 /// The buffer deallocation transformation which ensures that all allocs in the
202 /// program have a corresponding de-allocation. As a side-effect, it might also
203 /// introduce clones that in turn leads to additional deallocations.
204 class BufferDeallocation : public BufferPlacementTransformationBase {
205 public:
206   using AliasAllocationMapT =
207       llvm::DenseMap<Value, bufferization::AllocationOpInterface>;
208 
BufferDeallocation(Operation * op)209   BufferDeallocation(Operation *op)
210       : BufferPlacementTransformationBase(op), dominators(op),
211         postDominators(op) {}
212 
213   /// Checks if all allocation operations either provide an already existing
214   /// deallocation operation or implement the AllocationOpInterface. In
215   /// addition, this method initializes the internal alias to
216   /// AllocationOpInterface mapping in order to get compatible
217   /// AllocationOpInterface implementations for aliases.
prepare()218   LogicalResult prepare() {
219     for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
220       // Get the defining allocation operation.
221       Value alloc = std::get<0>(entry);
222       auto allocationInterface =
223           alloc.getDefiningOp<bufferization::AllocationOpInterface>();
224       // If there is no existing deallocation operation and no implementation of
225       // the AllocationOpInterface, we cannot apply the BufferDeallocation pass.
226       if (!std::get<1>(entry) && !allocationInterface) {
227         return alloc.getDefiningOp()->emitError(
228             "Allocation is not deallocated explicitly nor does the operation "
229             "implement the AllocationOpInterface.");
230       }
231 
232       // Register the current allocation interface implementation.
233       aliasToAllocations[alloc] = allocationInterface;
234 
235       // Get the alias information for the current allocation node.
236       for (Value alias : aliases.resolve(alloc)) {
237         // TODO: check for incompatible implementations of the
238         // AllocationOpInterface. This could be realized by promoting the
239         // AllocationOpInterface to a DialectInterface.
240         aliasToAllocations[alias] = allocationInterface;
241       }
242     }
243     return success();
244   }
245 
246   /// Performs the actual placement/creation of all temporary clone and dealloc
247   /// nodes.
deallocate()248   LogicalResult deallocate() {
249     // Add additional clones that are required.
250     if (failed(introduceClones()))
251       return failure();
252 
253     // Place deallocations for all allocation entries.
254     return placeDeallocs();
255   }
256 
257 private:
258   /// Introduces required clone operations to avoid memory leaks.
introduceClones()259   LogicalResult introduceClones() {
260     // Initialize the set of values that require a dedicated memory free
261     // operation since their operands cannot be safely deallocated in a post
262     // dominator.
263     SetVector<Value> valuesToFree;
264     llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues;
265     SmallVector<std::tuple<Value, Block *>, 8> toProcess;
266 
267     // Check dominance relation for proper dominance properties. If the given
268     // value node does not dominate an alias, we will have to create a clone in
269     // order to free all buffers that can potentially leak into a post
270     // dominator.
271     auto findUnsafeValues = [&](Value source, Block *definingBlock) {
272       auto it = aliases.find(source);
273       if (it == aliases.end())
274         return;
275       for (Value value : it->second) {
276         if (valuesToFree.count(value) > 0)
277           continue;
278         Block *parentBlock = value.getParentBlock();
279         // Check whether we have to free this particular block argument or
280         // generic value. We have to free the current alias if it is either
281         // defined in a non-dominated block or it is defined in the same block
282         // but the current value is not dominated by the source value.
283         if (!dominators.dominates(definingBlock, parentBlock) ||
284             (definingBlock == parentBlock && isa<BlockArgument>(value))) {
285           toProcess.emplace_back(value, parentBlock);
286           valuesToFree.insert(value);
287         } else if (visitedValues.insert(std::make_tuple(value, definingBlock))
288                        .second)
289           toProcess.emplace_back(value, definingBlock);
290       }
291     };
292 
293     // Detect possibly unsafe aliases starting from all allocations.
294     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
295       Value allocValue = std::get<0>(entry);
296       findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock());
297     }
298     // Try to find block arguments that require an explicit free operation
299     // until we reach a fix point.
300     while (!toProcess.empty()) {
301       auto current = toProcess.pop_back_val();
302       findUnsafeValues(std::get<0>(current), std::get<1>(current));
303     }
304 
305     // Update buffer aliases to ensure that we free all buffers and block
306     // arguments at the correct locations.
307     aliases.remove(valuesToFree);
308 
309     // Add new allocs and additional clone operations.
310     for (Value value : valuesToFree) {
311       if (failed(isa<BlockArgument>(value)
312                      ? introduceBlockArgCopy(cast<BlockArgument>(value))
313                      : introduceValueCopyForRegionResult(value)))
314         return failure();
315 
316       // Register the value to require a final dealloc. Note that we do not have
317       // to assign a block here since we do not want to move the allocation node
318       // to another location.
319       allocs.registerAlloc(std::make_tuple(value, nullptr));
320     }
321     return success();
322   }
323 
324   /// Introduces temporary clones in all predecessors and copies the source
325   /// values into the newly allocated buffers.
introduceBlockArgCopy(BlockArgument blockArg)326   LogicalResult introduceBlockArgCopy(BlockArgument blockArg) {
327     // Allocate a buffer for the current block argument in the block of
328     // the associated value (which will be a predecessor block by
329     // definition).
330     Block *block = blockArg.getOwner();
331     for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) {
332       // Get the terminator and the value that will be passed to our
333       // argument.
334       Operation *terminator = (*it)->getTerminator();
335       auto branchInterface = cast<BranchOpInterface>(terminator);
336       SuccessorOperands operands =
337           branchInterface.getSuccessorOperands(it.getSuccessorIndex());
338 
339       // Query the associated source value.
340       Value sourceValue = operands[blockArg.getArgNumber()];
341       if (!sourceValue) {
342         return failure();
343       }
344       // Wire new clone and successor operand.
345       // Create a new clone at the current location of the terminator.
346       auto clone = introduceCloneBuffers(sourceValue, terminator);
347       if (failed(clone))
348         return failure();
349       operands.slice(blockArg.getArgNumber(), 1).assign(*clone);
350     }
351 
352     // Check whether the block argument has implicitly defined predecessors via
353     // the RegionBranchOpInterface. This can be the case if the current block
354     // argument belongs to the first block in a region and the parent operation
355     // implements the RegionBranchOpInterface.
356     Region *argRegion = block->getParent();
357     Operation *parentOp = argRegion->getParentOp();
358     RegionBranchOpInterface regionInterface;
359     if (&argRegion->front() != block ||
360         !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
361       return success();
362 
363     if (failed(introduceClonesForRegionSuccessors(
364             regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
365             [&](RegionSuccessor &successorRegion) {
366               // Find a predecessor of our argRegion.
367               return successorRegion.getSuccessor() == argRegion;
368             })))
369       return failure();
370 
371     // Check whether the block argument belongs to an entry region of the
372     // parent operation. In this case, we have to introduce an additional clone
373     // for buffer that is passed to the argument.
374     SmallVector<RegionSuccessor, 2> successorRegions;
375     regionInterface.getSuccessorRegions(/*point=*/RegionBranchPoint::parent(),
376                                         successorRegions);
377     auto *it =
378         llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) {
379           return successorRegion.getSuccessor() == argRegion;
380         });
381     if (it == successorRegions.end())
382       return success();
383 
384     // Determine the actual operand to introduce a clone for and rewire the
385     // operand to point to the clone instead.
386     auto operands = regionInterface.getEntrySuccessorOperands(argRegion);
387     size_t operandIndex =
388         llvm::find(it->getSuccessorInputs(), blockArg).getIndex() +
389         operands.getBeginOperandIndex();
390     Value operand = parentOp->getOperand(operandIndex);
391     assert(operand ==
392                operands[operandIndex - operands.getBeginOperandIndex()] &&
393            "region interface operands don't match parentOp operands");
394     auto clone = introduceCloneBuffers(operand, parentOp);
395     if (failed(clone))
396       return failure();
397 
398     parentOp->setOperand(operandIndex, *clone);
399     return success();
400   }
401 
402   /// Introduces temporary clones in front of all associated nested-region
403   /// terminators and copies the source values into the newly allocated buffers.
introduceValueCopyForRegionResult(Value value)404   LogicalResult introduceValueCopyForRegionResult(Value value) {
405     // Get the actual result index in the scope of the parent terminator.
406     Operation *operation = value.getDefiningOp();
407     auto regionInterface = cast<RegionBranchOpInterface>(operation);
408     // Filter successors that return to the parent operation.
409     auto regionPredicate = [&](RegionSuccessor &successorRegion) {
410       // If the RegionSuccessor has no associated successor, it will return to
411       // its parent operation.
412       return !successorRegion.getSuccessor();
413     };
414     // Introduce a clone for all region "results" that are returned to the
415     // parent operation. This is required since the parent's result value has
416     // been considered critical. Therefore, the algorithm assumes that a clone
417     // of a previously allocated buffer is returned by the operation (like in
418     // the case of a block argument).
419     return introduceClonesForRegionSuccessors(
420         regionInterface, operation->getRegions(), value, regionPredicate);
421   }
422 
423   /// Introduces buffer clones for all terminators in the given regions. The
424   /// regionPredicate is applied to every successor region in order to restrict
425   /// the clones to specific regions.
426   template <typename TPredicate>
introduceClonesForRegionSuccessors(RegionBranchOpInterface regionInterface,MutableArrayRef<Region> regions,Value argValue,const TPredicate & regionPredicate)427   LogicalResult introduceClonesForRegionSuccessors(
428       RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
429       Value argValue, const TPredicate &regionPredicate) {
430     for (Region &region : regions) {
431       // Query the regionInterface to get all successor regions of the current
432       // one.
433       SmallVector<RegionSuccessor, 2> successorRegions;
434       regionInterface.getSuccessorRegions(region, successorRegions);
435       // Try to find a matching region successor.
436       RegionSuccessor *regionSuccessor =
437           llvm::find_if(successorRegions, regionPredicate);
438       if (regionSuccessor == successorRegions.end())
439         continue;
440       // Get the operand index in the context of the current successor input
441       // bindings.
442       size_t operandIndex =
443           llvm::find(regionSuccessor->getSuccessorInputs(), argValue)
444               .getIndex();
445 
446       // Iterate over all immediate terminator operations to introduce
447       // new buffer allocations. Thereby, the appropriate terminator operand
448       // will be adjusted to point to the newly allocated buffer instead.
449       if (failed(walkReturnOperations(
450               &region, [&](RegionBranchTerminatorOpInterface terminator) {
451                 // Get the actual mutable operands for this terminator op.
452                 auto terminatorOperands =
453                     terminator.getMutableSuccessorOperands(*regionSuccessor);
454                 // Extract the source value from the current terminator.
455                 // This conversion needs to exist on a separate line due to a
456                 // bug in GCC conversion analysis.
457                 OperandRange immutableTerminatorOperands = terminatorOperands;
458                 Value sourceValue = immutableTerminatorOperands[operandIndex];
459                 // Create a new clone at the current location of the terminator.
460                 auto clone = introduceCloneBuffers(sourceValue, terminator);
461                 if (failed(clone))
462                   return failure();
463                 // Wire clone and terminator operand.
464                 terminatorOperands.slice(operandIndex, 1).assign(*clone);
465                 return success();
466               })))
467         return failure();
468     }
469     return success();
470   }
471 
472   /// Creates a new memory allocation for the given source value and clones
473   /// its content into the newly allocated buffer. The terminator operation is
474   /// used to insert the clone operation at the right place.
introduceCloneBuffers(Value sourceValue,Operation * terminator)475   FailureOr<Value> introduceCloneBuffers(Value sourceValue,
476                                          Operation *terminator) {
477     // Avoid multiple clones of the same source value. This can happen in the
478     // presence of loops when a branch acts as a backedge while also having
479     // another successor that returns to its parent operation. Note: that
480     // copying copied buffers can introduce memory leaks since the invariant of
481     // BufferDeallocation assumes that a buffer will be only cloned once into a
482     // temporary buffer. Hence, the construction of clone chains introduces
483     // additional allocations that are not tracked automatically by the
484     // algorithm.
485     if (clonedValues.contains(sourceValue))
486       return sourceValue;
487     // Create a new clone operation that copies the contents of the old
488     // buffer to the new one.
489     auto clone = buildClone(terminator, sourceValue);
490     if (succeeded(clone)) {
491       // Remember the clone of original source value.
492       clonedValues.insert(*clone);
493     }
494     return clone;
495   }
496 
497   /// Finds correct dealloc positions according to the algorithm described at
498   /// the top of the file for all alloc nodes and block arguments that can be
499   /// handled by this analysis.
placeDeallocs()500   LogicalResult placeDeallocs() {
501     // Move or insert deallocs using the previously computed information.
502     // These deallocations will be linked to their associated allocation nodes
503     // since they don't have any aliases that can (potentially) increase their
504     // liveness.
505     for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
506       Value alloc = std::get<0>(entry);
507       auto aliasesSet = aliases.resolve(alloc);
508       assert(!aliasesSet.empty() && "must contain at least one alias");
509 
510       // Determine the actual block to place the dealloc and get liveness
511       // information.
512       Block *placementBlock =
513           findCommonDominator(alloc, aliasesSet, postDominators);
514       const LivenessBlockInfo *livenessInfo =
515           liveness.getLiveness(placementBlock);
516 
517       // We have to ensure that the dealloc will be after the last use of all
518       // aliases of the given value. We first assume that there are no uses in
519       // the placementBlock and that we can safely place the dealloc at the
520       // beginning.
521       Operation *endOperation = &placementBlock->front();
522 
523       // Iterate over all aliases and ensure that the endOperation will point
524       // to the last operation of all potential aliases in the placementBlock.
525       for (Value alias : aliasesSet) {
526         // Ensure that the start operation is at least the defining operation of
527         // the current alias to avoid invalid placement of deallocs for aliases
528         // without any uses.
529         Operation *beforeOp = endOperation;
530         if (alias.getDefiningOp() &&
531             !(beforeOp = placementBlock->findAncestorOpInBlock(
532                   *alias.getDefiningOp())))
533           continue;
534 
535         Operation *aliasEndOperation =
536             livenessInfo->getEndOperation(alias, beforeOp);
537         // Check whether the aliasEndOperation lies in the desired block and
538         // whether it is behind the current endOperation. If yes, this will be
539         // the new endOperation.
540         if (aliasEndOperation->getBlock() == placementBlock &&
541             endOperation->isBeforeInBlock(aliasEndOperation))
542           endOperation = aliasEndOperation;
543       }
544       // endOperation is the last operation behind which we can safely store
545       // the dealloc taking all potential aliases into account.
546 
547       // If there is an existing dealloc, move it to the right place.
548       Operation *deallocOperation = std::get<1>(entry);
549       if (deallocOperation) {
550         deallocOperation->moveAfter(endOperation);
551       } else {
552         // If the Dealloc position is at the terminator operation of the
553         // block, then the value should escape from a deallocation.
554         Operation *nextOp = endOperation->getNextNode();
555         if (!nextOp)
556           continue;
557         // If there is no dealloc node, insert one in the right place.
558         if (failed(buildDealloc(nextOp, alloc)))
559           return failure();
560       }
561     }
562     return success();
563   }
564 
565   /// Builds a deallocation operation compatible with the given allocation
566   /// value. If there is no registered AllocationOpInterface implementation for
567   /// the given value (e.g. in the case of a function parameter), this method
568   /// builds a memref::DeallocOp.
buildDealloc(Operation * op,Value alloc)569   LogicalResult buildDealloc(Operation *op, Value alloc) {
570     OpBuilder builder(op);
571     auto it = aliasToAllocations.find(alloc);
572     if (it != aliasToAllocations.end()) {
573       // Call the allocation op interface to build a supported and
574       // compatible deallocation operation.
575       auto dealloc = it->second.buildDealloc(builder, alloc);
576       if (!dealloc)
577         return op->emitError()
578                << "allocations without compatible deallocations are "
579                   "not supported";
580     } else {
581       // Build a "default" DeallocOp for unknown allocation sources.
582       builder.create<memref::DeallocOp>(alloc.getLoc(), alloc);
583     }
584     return success();
585   }
586 
587   /// Builds a clone operation compatible with the given allocation value. If
588   /// there is no registered AllocationOpInterface implementation for the given
589   /// value (e.g. in the case of a function parameter), this method builds a
590   /// bufferization::CloneOp.
buildClone(Operation * op,Value alloc)591   FailureOr<Value> buildClone(Operation *op, Value alloc) {
592     OpBuilder builder(op);
593     auto it = aliasToAllocations.find(alloc);
594     if (it != aliasToAllocations.end()) {
595       // Call the allocation op interface to build a supported and
596       // compatible clone operation.
597       auto clone = it->second.buildClone(builder, alloc);
598       if (clone)
599         return *clone;
600       return (LogicalResult)(op->emitError()
601                              << "allocations without compatible clone ops "
602                                 "are not supported");
603     }
604     // Build a "default" CloneOp for unknown allocation sources.
605     return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
606         .getResult();
607   }
608 
609   /// The dominator info to find the appropriate start operation to move the
610   /// allocs.
611   DominanceInfo dominators;
612 
613   /// The post dominator info to move the dependent allocs in the right
614   /// position.
615   PostDominanceInfo postDominators;
616 
617   /// Stores already cloned buffers to avoid additional clones of clones.
618   ValueSetT clonedValues;
619 
620   /// Maps aliases to their source allocation interfaces (inverse mapping).
621   AliasAllocationMapT aliasToAllocations;
622 };
623 
624 //===----------------------------------------------------------------------===//
625 // BufferDeallocationPass
626 //===----------------------------------------------------------------------===//
627 
628 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
629 /// into the right positions. Furthermore, it inserts additional clones if
630 /// necessary. It uses the algorithm described at the top of the file.
631 struct BufferDeallocationPass
632     : public bufferization::impl::BufferDeallocationBase<
633           BufferDeallocationPass> {
getDependentDialects__anon0f1aa16f0211::BufferDeallocationPass634   void getDependentDialects(DialectRegistry &registry) const override {
635     registry.insert<bufferization::BufferizationDialect>();
636     registry.insert<memref::MemRefDialect>();
637   }
638 
runOnOperation__anon0f1aa16f0211::BufferDeallocationPass639   void runOnOperation() override {
640     func::FuncOp func = getOperation();
641     if (func.isExternal())
642       return;
643 
644     if (failed(deallocateBuffers(func)))
645       signalPassFailure();
646   }
647 };
648 
649 } // namespace
650 
deallocateBuffers(Operation * op)651 LogicalResult bufferization::deallocateBuffers(Operation *op) {
652   if (isa<ModuleOp>(op)) {
653     WalkResult result = op->walk([&](func::FuncOp funcOp) {
654       if (failed(deallocateBuffers(funcOp)))
655         return WalkResult::interrupt();
656       return WalkResult::advance();
657     });
658     return success(!result.wasInterrupted());
659   }
660 
661   // Ensure that there are supported loops only.
662   Backedges backedges(op);
663   if (backedges.size()) {
664     op->emitError("Only structured control-flow loops are supported.");
665     return failure();
666   }
667 
668   // Check that the control flow structures are supported.
669   if (!validateSupportedControlFlow(op))
670     return failure();
671 
672   // Gather all required allocation nodes and prepare the deallocation phase.
673   BufferDeallocation deallocation(op);
674 
675   // Check for supported AllocationOpInterface implementations and prepare the
676   // internal deallocation pass.
677   if (failed(deallocation.prepare()))
678     return failure();
679 
680   // Place all required temporary clone and dealloc nodes.
681   if (failed(deallocation.deallocate()))
682     return failure();
683 
684   return success();
685 }
686 
687 //===----------------------------------------------------------------------===//
688 // BufferDeallocationPass construction
689 //===----------------------------------------------------------------------===//
690 
createBufferDeallocationPass()691 std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() {
692   return std::make_unique<BufferDeallocationPass>();
693 }
694