xref: /llvm-project/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp (revision 10ae8ae8375d6b69064204338a33500917749da9)
1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
14 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
15 
16 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
17 #include "mlir/Dialect/Bufferization/Transforms/Transforms.h"
18 #include "mlir/Dialect/Func/IR/FuncOps.h"
19 #include "mlir/Dialect/MemRef/IR/MemRef.h"
20 #include "mlir/IR/Operation.h"
21 #include "mlir/Interfaces/LoopLikeInterface.h"
22 #include "mlir/Pass/Pass.h"
23 
24 namespace mlir {
25 namespace bufferization {
26 #define GEN_PASS_DEF_BUFFERHOISTING
27 #define GEN_PASS_DEF_BUFFERLOOPHOISTING
28 #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACK
29 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
30 } // namespace bufferization
31 } // namespace mlir
32 
33 using namespace mlir;
34 using namespace mlir::bufferization;
35 
36 /// Returns true if the given operation implements a known high-level region-
37 /// based control-flow interface.
38 static bool isKnownControlFlowInterface(Operation *op) {
39   return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
40 }
41 
42 /// Check if the size of the allocation is less than the given size. The
43 /// transformation is only applied to small buffers since large buffers could
44 /// exceed the stack space.
45 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
46                                 unsigned maxRankOfAllocatedMemRef) {
47   auto type = dyn_cast<ShapedType>(alloc.getType());
48   if (!type || !alloc.getDefiningOp<memref::AllocOp>())
49     return false;
50   if (!type.hasStaticShape()) {
51     // Check if the dynamic shape dimension of the alloc is produced by
52     // `memref.rank`. If this is the case, it is likely to be small.
53     // Furthermore, the dimension is limited to the maximum rank of the
54     // allocated memref to avoid large values by multiplying several small
55     // values.
56     if (type.getRank() <= maxRankOfAllocatedMemRef) {
57       return llvm::all_of(alloc.getDefiningOp()->getOperands(),
58                           [&](Value operand) {
59                             return operand.getDefiningOp<memref::RankOp>();
60                           });
61     }
62     return false;
63   }
64   unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
65                           .getTypeSizeInBits(type.getElementType());
66   return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
67 }
68 
69 /// Checks whether the given aliases leave the allocation scope.
70 static bool
71 leavesAllocationScope(Region *parentRegion,
72                       const BufferViewFlowAnalysis::ValueSetT &aliases) {
73   for (Value alias : aliases) {
74     for (auto *use : alias.getUsers()) {
75       // If there is at least one alias that leaves the parent region, we know
76       // that this alias escapes the whole region and hence the associated
77       // allocation leaves allocation scope.
78       if (isa<RegionBranchTerminatorOpInterface>(use) &&
79           use->getParentRegion() == parentRegion)
80         return true;
81     }
82   }
83   return false;
84 }
85 
86 /// Checks, if an automated allocation scope for a given alloc value exists.
87 static bool hasAllocationScope(Value alloc,
88                                const BufferViewFlowAnalysis &aliasAnalysis) {
89   Region *region = alloc.getParentRegion();
90   do {
91     if (Operation *parentOp = region->getParentOp()) {
92       // Check if the operation is an automatic allocation scope and whether an
93       // alias leaves the scope. This means, an allocation yields out of
94       // this scope and can not be transformed in a stack-based allocation.
95       if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
96           !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
97         return true;
98       // Check if the operation is a known control flow interface and break the
99       // loop to avoid transformation in loops. Furthermore skip transformation
100       // if the operation does not implement a RegionBeanchOpInterface.
101       if (BufferPlacementTransformationBase::isLoop(parentOp) ||
102           !isKnownControlFlowInterface(parentOp))
103         break;
104     }
105   } while ((region = region->getParentRegion()));
106   return false;
107 }
108 
109 namespace {
110 
111 //===----------------------------------------------------------------------===//
112 // BufferAllocationHoisting
113 //===----------------------------------------------------------------------===//
114 
115 /// A base implementation compatible with the `BufferAllocationHoisting` class.
116 struct BufferAllocationHoistingStateBase {
117   /// A pointer to the current dominance info.
118   DominanceInfo *dominators;
119 
120   /// The current allocation value.
121   Value allocValue;
122 
123   /// The current placement block (if any).
124   Block *placementBlock;
125 
126   /// Initializes the state base.
127   BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
128                                     Block *placementBlock)
129       : dominators(dominators), allocValue(allocValue),
130         placementBlock(placementBlock) {}
131 };
132 
133 /// Implements the actual hoisting logic for allocation nodes.
134 template <typename StateT>
135 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
136 public:
137   BufferAllocationHoisting(Operation *op)
138       : BufferPlacementTransformationBase(op), dominators(op),
139         postDominators(op), scopeOp(op) {}
140 
141   /// Moves allocations upwards.
142   void hoist() {
143     SmallVector<Value> allocsAndAllocas;
144     for (BufferPlacementAllocs::AllocEntry &entry : allocs)
145       allocsAndAllocas.push_back(std::get<0>(entry));
146     scopeOp->walk([&](memref::AllocaOp op) {
147       allocsAndAllocas.push_back(op.getMemref());
148     });
149 
150     for (auto allocValue : allocsAndAllocas) {
151       if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
152         continue;
153       Operation *definingOp = allocValue.getDefiningOp();
154       assert(definingOp && "No defining op");
155       auto operands = definingOp->getOperands();
156       auto resultAliases = aliases.resolve(allocValue);
157       // Determine the common dominator block of all aliases.
158       Block *dominatorBlock =
159           findCommonDominator(allocValue, resultAliases, dominators);
160       // Init the initial hoisting state.
161       StateT state(&dominators, allocValue, allocValue.getParentBlock());
162       // Check for additional allocation dependencies to compute an upper bound
163       // for hoisting.
164       Block *dependencyBlock = nullptr;
165       // If this node has dependencies, check all dependent nodes. This ensures
166       // that all dependency values have been computed before allocating the
167       // buffer.
168       for (Value depValue : operands) {
169         Block *depBlock = depValue.getParentBlock();
170         if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
171           dependencyBlock = depBlock;
172       }
173 
174       // Find the actual placement block and determine the start operation using
175       // an upper placement-block boundary. The idea is that placement block
176       // cannot be moved any further upwards than the given upper bound.
177       Block *placementBlock = findPlacementBlock(
178           state, state.computeUpperBound(dominatorBlock, dependencyBlock));
179       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
180           allocValue, placementBlock, liveness);
181 
182       // Move the alloc in front of the start operation.
183       Operation *allocOperation = allocValue.getDefiningOp();
184       allocOperation->moveBefore(startOperation);
185     }
186   }
187 
188 private:
189   /// Finds a valid placement block by walking upwards in the CFG until we
190   /// either cannot continue our walk due to constraints (given by the StateT
191   /// implementation) or we have reached the upper-most dominator block.
192   Block *findPlacementBlock(StateT &state, Block *upperBound) {
193     Block *currentBlock = state.placementBlock;
194     // Walk from the innermost regions/loops to the outermost regions/loops and
195     // find an appropriate placement block that satisfies the constraint of the
196     // current StateT implementation. Walk until we reach the upperBound block
197     // (if any).
198 
199     // If we are not able to find a valid parent operation or an associated
200     // parent block, break the walk loop.
201     Operation *parentOp;
202     Block *parentBlock;
203     while ((parentOp = currentBlock->getParentOp()) &&
204            (parentBlock = parentOp->getBlock()) &&
205            (!upperBound ||
206             dominators.properlyDominates(upperBound, currentBlock))) {
207       // Try to find an immediate dominator and check whether the parent block
208       // is above the immediate dominator (if any).
209       DominanceInfoNode *idom = nullptr;
210 
211       // DominanceInfo doesn't support getNode queries for single-block regions.
212       if (!currentBlock->isEntryBlock())
213         idom = dominators.getNode(currentBlock)->getIDom();
214 
215       if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
216         // If the current immediate dominator is below the placement block, move
217         // to the immediate dominator block.
218         currentBlock = idom->getBlock();
219         state.recordMoveToDominator(currentBlock);
220       } else {
221         // We have to move to our parent block since an immediate dominator does
222         // either not exist or is above our parent block. If we cannot move to
223         // our parent operation due to constraints given by the StateT
224         // implementation, break the walk loop. Furthermore, we should not move
225         // allocations out of unknown region-based control-flow operations.
226         if (!isKnownControlFlowInterface(parentOp) ||
227             !state.isLegalPlacement(parentOp))
228           break;
229         // Move to our parent block by notifying the current StateT
230         // implementation.
231         currentBlock = parentBlock;
232         state.recordMoveToParent(currentBlock);
233       }
234     }
235     // Return the finally determined placement block.
236     return state.placementBlock;
237   }
238 
239   /// The dominator info to find the appropriate start operation to move the
240   /// allocs.
241   DominanceInfo dominators;
242 
243   /// The post dominator info to move the dependent allocs in the right
244   /// position.
245   PostDominanceInfo postDominators;
246 
247   /// The map storing the final placement blocks of a given alloc value.
248   llvm::DenseMap<Value, Block *> placementBlocks;
249 
250   /// The operation that this transformation is working on. It is used to also
251   /// gather allocas.
252   Operation *scopeOp;
253 };
254 
255 /// A state implementation compatible with the `BufferAllocationHoisting` class
256 /// that hoists allocations into dominator blocks while keeping them inside of
257 /// loops.
258 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
259   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
260 
261   /// Computes the upper bound for the placement block search.
262   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
263     // If we do not have a dependency block, the upper bound is given by the
264     // dominator block.
265     if (!dependencyBlock)
266       return dominatorBlock;
267 
268     // Find the "lower" block of the dominator and the dependency block to
269     // ensure that we do not move allocations above this block.
270     return dominators->properlyDominates(dominatorBlock, dependencyBlock)
271                ? dependencyBlock
272                : dominatorBlock;
273   }
274 
275   /// Returns true if the given operation does not represent a loop.
276   bool isLegalPlacement(Operation *op) {
277     return !BufferPlacementTransformationBase::isLoop(op);
278   }
279 
280   /// Returns true if the given operation should be considered for hoisting.
281   static bool shouldHoistOpType(Operation *op) {
282     return llvm::isa<memref::AllocOp>(op);
283   }
284 
285   /// Sets the current placement block to the given block.
286   void recordMoveToDominator(Block *block) { placementBlock = block; }
287 
288   /// Sets the current placement block to the given block.
289   void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
290 };
291 
292 /// A state implementation compatible with the `BufferAllocationHoisting` class
293 /// that hoists allocations out of loops.
294 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
295   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
296 
297   /// Remembers the dominator block of all aliases.
298   Block *aliasDominatorBlock = nullptr;
299 
300   /// Computes the upper bound for the placement block search.
301   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
302     aliasDominatorBlock = dominatorBlock;
303     // If there is a dependency block, we have to use this block as an upper
304     // bound to satisfy all allocation value dependencies.
305     return dependencyBlock ? dependencyBlock : nullptr;
306   }
307 
308   /// Returns true if the given operation represents a loop and one of the
309   /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
310   /// given loop operation. If this is the case, it indicates that the
311   /// allocation is passed via a back edge.
312   bool isLegalPlacement(Operation *op) {
313     return BufferPlacementTransformationBase::isLoop(op) &&
314            !dominators->dominates(aliasDominatorBlock, op->getBlock());
315   }
316 
317   /// Returns true if the given operation should be considered for hoisting.
318   static bool shouldHoistOpType(Operation *op) {
319     return llvm::isa<memref::AllocOp, memref::AllocaOp>(op);
320   }
321 
322   /// Does not change the internal placement block, as we want to move
323   /// operations out of loops only.
324   void recordMoveToDominator(Block *block) {}
325 
326   /// Sets the current placement block to the given block.
327   void recordMoveToParent(Block *block) { placementBlock = block; }
328 };
329 
330 //===----------------------------------------------------------------------===//
331 // BufferPlacementPromotion
332 //===----------------------------------------------------------------------===//
333 
334 /// Promotes heap-based allocations to stack-based allocations (if possible).
335 class BufferPlacementPromotion : BufferPlacementTransformationBase {
336 public:
337   BufferPlacementPromotion(Operation *op)
338       : BufferPlacementTransformationBase(op) {}
339 
340   /// Promote buffers to stack-based allocations.
341   void promote(function_ref<bool(Value)> isSmallAlloc) {
342     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
343       Value alloc = std::get<0>(entry);
344       Operation *dealloc = std::get<1>(entry);
345       // Checking several requirements to transform an AllocOp into an AllocaOp.
346       // The transformation is done if the allocation is limited to a given
347       // size. Furthermore, a deallocation must not be defined for this
348       // allocation entry and a parent allocation scope must exist.
349       if (!isSmallAlloc(alloc) || dealloc ||
350           !hasAllocationScope(alloc, aliases))
351         continue;
352 
353       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
354           alloc, alloc.getParentBlock(), liveness);
355       // Build a new alloca that is associated with its parent
356       // `AutomaticAllocationScope` determined during the initialization phase.
357       OpBuilder builder(startOperation);
358       Operation *allocOp = alloc.getDefiningOp();
359       Operation *alloca = builder.create<memref::AllocaOp>(
360           alloc.getLoc(), cast<MemRefType>(alloc.getType()),
361           allocOp->getOperands(), allocOp->getAttrs());
362 
363       // Replace the original alloc by a newly created alloca.
364       allocOp->replaceAllUsesWith(alloca);
365       allocOp->erase();
366     }
367   }
368 };
369 
370 //===----------------------------------------------------------------------===//
371 // BufferOptimizationPasses
372 //===----------------------------------------------------------------------===//
373 
374 /// The buffer hoisting pass that hoists allocation nodes into dominating
375 /// blocks.
376 struct BufferHoistingPass
377     : public bufferization::impl::BufferHoistingBase<BufferHoistingPass> {
378 
379   void runOnOperation() override {
380     // Hoist all allocations into dominator blocks.
381     BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
382         getOperation());
383     optimizer.hoist();
384   }
385 };
386 
387 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
388 struct BufferLoopHoistingPass
389     : public bufferization::impl::BufferLoopHoistingBase<
390           BufferLoopHoistingPass> {
391 
392   void runOnOperation() override {
393     // Hoist all allocations out of loops.
394     hoistBuffersFromLoops(getOperation());
395   }
396 };
397 
398 /// The promote buffer to stack pass that tries to convert alloc nodes into
399 /// alloca nodes.
400 class PromoteBuffersToStackPass
401     : public bufferization::impl::PromoteBuffersToStackBase<
402           PromoteBuffersToStackPass> {
403 public:
404   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
405                             unsigned maxRankOfAllocatedMemRef) {
406     this->maxAllocSizeInBytes = maxAllocSizeInBytes;
407     this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
408   }
409 
410   explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
411       : isSmallAlloc(std::move(isSmallAlloc)) {}
412 
413   LogicalResult initialize(MLIRContext *context) override {
414     if (isSmallAlloc == nullptr) {
415       isSmallAlloc = [=](Value alloc) {
416         return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
417                                    maxRankOfAllocatedMemRef);
418       };
419     }
420     return success();
421   }
422 
423   void runOnOperation() override {
424     // Move all allocation nodes and convert candidates into allocas.
425     BufferPlacementPromotion optimizer(getOperation());
426     optimizer.promote(isSmallAlloc);
427   }
428 
429 private:
430   std::function<bool(Value)> isSmallAlloc;
431 };
432 
433 } // namespace
434 
435 void mlir::bufferization::hoistBuffersFromLoops(Operation *op) {
436   BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op);
437   optimizer.hoist();
438 }
439 
440 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() {
441   return std::make_unique<BufferHoistingPass>();
442 }
443 
444 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() {
445   return std::make_unique<BufferLoopHoistingPass>();
446 }
447 
448 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
449     unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) {
450   return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
451                                                      maxRankOfAllocatedMemRef);
452 }
453 
454 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
455     std::function<bool(Value)> isSmallAlloc) {
456   return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
457 }
458