xref: /llvm-project/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp (revision 0ad6ac8c5338e42192bc006576397a02b838d265)
1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
14 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
15 
16 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
17 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
18 #include "mlir/Dialect/Bufferization/Transforms/Transforms.h"
19 #include "mlir/Dialect/Func/IR/FuncOps.h"
20 #include "mlir/Dialect/MemRef/IR/MemRef.h"
21 #include "mlir/IR/Operation.h"
22 #include "mlir/Interfaces/LoopLikeInterface.h"
23 #include "mlir/Pass/Pass.h"
24 
25 namespace mlir {
26 namespace bufferization {
27 #define GEN_PASS_DEF_BUFFERHOISTING
28 #define GEN_PASS_DEF_BUFFERLOOPHOISTING
29 #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACK
30 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
31 } // namespace bufferization
32 } // namespace mlir
33 
34 using namespace mlir;
35 using namespace mlir::bufferization;
36 
37 /// Returns true if the given operation implements a known high-level region-
38 /// based control-flow interface.
isKnownControlFlowInterface(Operation * op)39 static bool isKnownControlFlowInterface(Operation *op) {
40   return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
41 }
42 
43 /// Returns true if the given operation represents a loop by testing whether it
44 /// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In
45 /// the case of a `RegionBranchOpInterface`, it checks all region-based control-
46 /// flow edges for cycles.
isLoop(Operation * op)47 static bool isLoop(Operation *op) {
48   // If the operation implements the `LoopLikeOpInterface` it can be considered
49   // a loop.
50   if (isa<LoopLikeOpInterface>(op))
51     return true;
52 
53   // If the operation does not implement the `RegionBranchOpInterface`, it is
54   // (currently) not possible to detect a loop.
55   auto regionInterface = dyn_cast<RegionBranchOpInterface>(op);
56   if (!regionInterface)
57     return false;
58 
59   return regionInterface.hasLoop();
60 }
61 
62 /// Return whether the given operation is a loop with sequential execution
63 /// semantics.
isSequentialLoop(Operation * op)64 static bool isSequentialLoop(Operation *op) {
65   return !op->hasTrait<OpTrait::HasParallelRegion>() && isLoop(op);
66 }
67 
68 /// Returns true if the given operation implements the AllocationOpInterface
69 /// and it supports the dominate block hoisting.
allowAllocDominateBlockHoisting(Operation * op)70 static bool allowAllocDominateBlockHoisting(Operation *op) {
71   auto allocOp = dyn_cast<AllocationOpInterface>(op);
72   return allocOp &&
73          static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block);
74 }
75 
76 /// Returns true if the given operation implements the AllocationOpInterface
77 /// and it supports the loop hoisting.
allowAllocLoopHoisting(Operation * op)78 static bool allowAllocLoopHoisting(Operation *op) {
79   auto allocOp = dyn_cast<AllocationOpInterface>(op);
80   return allocOp &&
81          static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop);
82 }
83 
84 /// Check if the size of the allocation is less than the given size. The
85 /// transformation is only applied to small buffers since large buffers could
86 /// exceed the stack space.
defaultIsSmallAlloc(Value alloc,unsigned maximumSizeInBytes,unsigned maxRankOfAllocatedMemRef)87 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
88                                 unsigned maxRankOfAllocatedMemRef) {
89   auto type = dyn_cast<ShapedType>(alloc.getType());
90   if (!type || !alloc.getDefiningOp<memref::AllocOp>())
91     return false;
92   if (!type.hasStaticShape()) {
93     // Check if the dynamic shape dimension of the alloc is produced by
94     // `memref.rank`. If this is the case, it is likely to be small.
95     // Furthermore, the dimension is limited to the maximum rank of the
96     // allocated memref to avoid large values by multiplying several small
97     // values.
98     if (type.getRank() <= maxRankOfAllocatedMemRef) {
99       return llvm::all_of(alloc.getDefiningOp()->getOperands(),
100                           [&](Value operand) {
101                             return operand.getDefiningOp<memref::RankOp>();
102                           });
103     }
104     return false;
105   }
106   unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
107                           .getTypeSizeInBits(type.getElementType());
108   return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
109 }
110 
111 /// Checks whether the given aliases leave the allocation scope.
112 static bool
leavesAllocationScope(Region * parentRegion,const BufferViewFlowAnalysis::ValueSetT & aliases)113 leavesAllocationScope(Region *parentRegion,
114                       const BufferViewFlowAnalysis::ValueSetT &aliases) {
115   for (Value alias : aliases) {
116     for (auto *use : alias.getUsers()) {
117       // If there is at least one alias that leaves the parent region, we know
118       // that this alias escapes the whole region and hence the associated
119       // allocation leaves allocation scope.
120       if (isa<RegionBranchTerminatorOpInterface>(use) &&
121           use->getParentRegion() == parentRegion)
122         return true;
123     }
124   }
125   return false;
126 }
127 
128 /// Checks, if an automated allocation scope for a given alloc value exists.
hasAllocationScope(Value alloc,const BufferViewFlowAnalysis & aliasAnalysis)129 static bool hasAllocationScope(Value alloc,
130                                const BufferViewFlowAnalysis &aliasAnalysis) {
131   Region *region = alloc.getParentRegion();
132   do {
133     if (Operation *parentOp = region->getParentOp()) {
134       // Check if the operation is an automatic allocation scope and whether an
135       // alias leaves the scope. This means, an allocation yields out of
136       // this scope and can not be transformed in a stack-based allocation.
137       if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
138           !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
139         return true;
140       // Check if the operation is a known control flow interface and break the
141       // loop to avoid transformation in loops. Furthermore skip transformation
142       // if the operation does not implement a RegionBeanchOpInterface.
143       if (isLoop(parentOp) || !isKnownControlFlowInterface(parentOp))
144         break;
145     }
146   } while ((region = region->getParentRegion()));
147   return false;
148 }
149 
150 namespace {
151 
152 //===----------------------------------------------------------------------===//
153 // BufferAllocationHoisting
154 //===----------------------------------------------------------------------===//
155 
156 /// A base implementation compatible with the `BufferAllocationHoisting` class.
157 struct BufferAllocationHoistingStateBase {
158   /// A pointer to the current dominance info.
159   DominanceInfo *dominators;
160 
161   /// The current allocation value.
162   Value allocValue;
163 
164   /// The current placement block (if any).
165   Block *placementBlock;
166 
167   /// Initializes the state base.
BufferAllocationHoistingStateBase__anon5e63145a0211::BufferAllocationHoistingStateBase168   BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
169                                     Block *placementBlock)
170       : dominators(dominators), allocValue(allocValue),
171         placementBlock(placementBlock) {}
172 };
173 
174 /// Implements the actual hoisting logic for allocation nodes.
175 template <typename StateT>
176 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
177 public:
BufferAllocationHoisting(Operation * op)178   BufferAllocationHoisting(Operation *op)
179       : BufferPlacementTransformationBase(op), dominators(op),
180         postDominators(op), scopeOp(op) {}
181 
182   /// Moves allocations upwards.
hoist()183   void hoist() {
184     SmallVector<Value> allocsAndAllocas;
185     for (BufferPlacementAllocs::AllocEntry &entry : allocs)
186       allocsAndAllocas.push_back(std::get<0>(entry));
187     scopeOp->walk([&](memref::AllocaOp op) {
188       allocsAndAllocas.push_back(op.getMemref());
189     });
190 
191     for (auto allocValue : allocsAndAllocas) {
192       if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
193         continue;
194       Operation *definingOp = allocValue.getDefiningOp();
195       assert(definingOp && "No defining op");
196       auto operands = definingOp->getOperands();
197       auto resultAliases = aliases.resolve(allocValue);
198       // Determine the common dominator block of all aliases.
199       Block *dominatorBlock =
200           findCommonDominator(allocValue, resultAliases, dominators);
201       // Init the initial hoisting state.
202       StateT state(&dominators, allocValue, allocValue.getParentBlock());
203       // Check for additional allocation dependencies to compute an upper bound
204       // for hoisting.
205       Block *dependencyBlock = nullptr;
206       // If this node has dependencies, check all dependent nodes. This ensures
207       // that all dependency values have been computed before allocating the
208       // buffer.
209       for (Value depValue : operands) {
210         Block *depBlock = depValue.getParentBlock();
211         if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
212           dependencyBlock = depBlock;
213       }
214 
215       // Find the actual placement block and determine the start operation using
216       // an upper placement-block boundary. The idea is that placement block
217       // cannot be moved any further upwards than the given upper bound.
218       Block *placementBlock = findPlacementBlock(
219           state, state.computeUpperBound(dominatorBlock, dependencyBlock));
220       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
221           allocValue, placementBlock, liveness);
222 
223       // Move the alloc in front of the start operation.
224       Operation *allocOperation = allocValue.getDefiningOp();
225       allocOperation->moveBefore(startOperation);
226     }
227   }
228 
229 private:
230   /// Finds a valid placement block by walking upwards in the CFG until we
231   /// either cannot continue our walk due to constraints (given by the StateT
232   /// implementation) or we have reached the upper-most dominator block.
findPlacementBlock(StateT & state,Block * upperBound)233   Block *findPlacementBlock(StateT &state, Block *upperBound) {
234     Block *currentBlock = state.placementBlock;
235     // Walk from the innermost regions/loops to the outermost regions/loops and
236     // find an appropriate placement block that satisfies the constraint of the
237     // current StateT implementation. Walk until we reach the upperBound block
238     // (if any).
239 
240     // If we are not able to find a valid parent operation or an associated
241     // parent block, break the walk loop.
242     Operation *parentOp;
243     Block *parentBlock;
244     while ((parentOp = currentBlock->getParentOp()) &&
245            (parentBlock = parentOp->getBlock()) &&
246            (!upperBound ||
247             dominators.properlyDominates(upperBound, currentBlock))) {
248       // Try to find an immediate dominator and check whether the parent block
249       // is above the immediate dominator (if any).
250       DominanceInfoNode *idom = nullptr;
251 
252       // DominanceInfo doesn't support getNode queries for single-block regions.
253       if (!currentBlock->isEntryBlock())
254         idom = dominators.getNode(currentBlock)->getIDom();
255 
256       if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
257         // If the current immediate dominator is below the placement block, move
258         // to the immediate dominator block.
259         currentBlock = idom->getBlock();
260         state.recordMoveToDominator(currentBlock);
261       } else {
262         // We have to move to our parent block since an immediate dominator does
263         // either not exist or is above our parent block. If we cannot move to
264         // our parent operation due to constraints given by the StateT
265         // implementation, break the walk loop. Furthermore, we should not move
266         // allocations out of unknown region-based control-flow operations.
267         if (!isKnownControlFlowInterface(parentOp) ||
268             !state.isLegalPlacement(parentOp))
269           break;
270         // Move to our parent block by notifying the current StateT
271         // implementation.
272         currentBlock = parentBlock;
273         state.recordMoveToParent(currentBlock);
274       }
275     }
276     // Return the finally determined placement block.
277     return state.placementBlock;
278   }
279 
280   /// The dominator info to find the appropriate start operation to move the
281   /// allocs.
282   DominanceInfo dominators;
283 
284   /// The post dominator info to move the dependent allocs in the right
285   /// position.
286   PostDominanceInfo postDominators;
287 
288   /// The map storing the final placement blocks of a given alloc value.
289   llvm::DenseMap<Value, Block *> placementBlocks;
290 
291   /// The operation that this transformation is working on. It is used to also
292   /// gather allocas.
293   Operation *scopeOp;
294 };
295 
296 /// A state implementation compatible with the `BufferAllocationHoisting` class
297 /// that hoists allocations into dominator blocks while keeping them inside of
298 /// loops.
299 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
300   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
301 
302   /// Computes the upper bound for the placement block search.
computeUpperBound__anon5e63145a0211::BufferAllocationHoistingState303   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
304     // If we do not have a dependency block, the upper bound is given by the
305     // dominator block.
306     if (!dependencyBlock)
307       return dominatorBlock;
308 
309     // Find the "lower" block of the dominator and the dependency block to
310     // ensure that we do not move allocations above this block.
311     return dominators->properlyDominates(dominatorBlock, dependencyBlock)
312                ? dependencyBlock
313                : dominatorBlock;
314   }
315 
316   /// Returns true if the given operation does not represent a loop.
isLegalPlacement__anon5e63145a0211::BufferAllocationHoistingState317   bool isLegalPlacement(Operation *op) { return !isLoop(op); }
318 
319   /// Returns true if the given operation should be considered for hoisting.
shouldHoistOpType__anon5e63145a0211::BufferAllocationHoistingState320   static bool shouldHoistOpType(Operation *op) {
321     return allowAllocDominateBlockHoisting(op);
322   }
323 
324   /// Sets the current placement block to the given block.
recordMoveToDominator__anon5e63145a0211::BufferAllocationHoistingState325   void recordMoveToDominator(Block *block) { placementBlock = block; }
326 
327   /// Sets the current placement block to the given block.
recordMoveToParent__anon5e63145a0211::BufferAllocationHoistingState328   void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
329 };
330 
331 /// A state implementation compatible with the `BufferAllocationHoisting` class
332 /// that hoists allocations out of loops.
333 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
334   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
335 
336   /// Remembers the dominator block of all aliases.
337   Block *aliasDominatorBlock = nullptr;
338 
339   /// Computes the upper bound for the placement block search.
computeUpperBound__anon5e63145a0211::BufferAllocationLoopHoistingState340   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
341     aliasDominatorBlock = dominatorBlock;
342     // If there is a dependency block, we have to use this block as an upper
343     // bound to satisfy all allocation value dependencies.
344     return dependencyBlock ? dependencyBlock : nullptr;
345   }
346 
347   /// Returns true if the given operation represents a loop with sequential
348   /// execution semantics and one of the aliases caused the
349   /// `aliasDominatorBlock` to be "above" the block of the given loop operation.
350   /// If this is the case, it indicates that the allocation is passed via a back
351   /// edge.
isLegalPlacement__anon5e63145a0211::BufferAllocationLoopHoistingState352   bool isLegalPlacement(Operation *op) {
353     return isSequentialLoop(op) &&
354            !dominators->dominates(aliasDominatorBlock, op->getBlock());
355   }
356 
357   /// Returns true if the given operation should be considered for hoisting.
shouldHoistOpType__anon5e63145a0211::BufferAllocationLoopHoistingState358   static bool shouldHoistOpType(Operation *op) {
359     return allowAllocLoopHoisting(op);
360   }
361 
362   /// Does not change the internal placement block, as we want to move
363   /// operations out of loops only.
recordMoveToDominator__anon5e63145a0211::BufferAllocationLoopHoistingState364   void recordMoveToDominator(Block *block) {}
365 
366   /// Sets the current placement block to the given block.
recordMoveToParent__anon5e63145a0211::BufferAllocationLoopHoistingState367   void recordMoveToParent(Block *block) { placementBlock = block; }
368 };
369 
370 //===----------------------------------------------------------------------===//
371 // BufferPlacementPromotion
372 //===----------------------------------------------------------------------===//
373 
374 /// Promotes heap-based allocations to stack-based allocations (if possible).
375 class BufferPlacementPromotion : BufferPlacementTransformationBase {
376 public:
BufferPlacementPromotion(Operation * op)377   BufferPlacementPromotion(Operation *op)
378       : BufferPlacementTransformationBase(op) {}
379 
380   /// Promote buffers to stack-based allocations.
promote(function_ref<bool (Value)> isSmallAlloc)381   void promote(function_ref<bool(Value)> isSmallAlloc) {
382     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
383       Value alloc = std::get<0>(entry);
384       Operation *dealloc = std::get<1>(entry);
385       // Checking several requirements to transform an AllocOp into an AllocaOp.
386       // The transformation is done if the allocation is limited to a given
387       // size. Furthermore, a deallocation must not be defined for this
388       // allocation entry and a parent allocation scope must exist.
389       if (!isSmallAlloc(alloc) || dealloc ||
390           !hasAllocationScope(alloc, aliases))
391         continue;
392 
393       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
394           alloc, alloc.getParentBlock(), liveness);
395       // Build a new alloca that is associated with its parent
396       // `AutomaticAllocationScope` determined during the initialization phase.
397       OpBuilder builder(startOperation);
398       Operation *allocOp = alloc.getDefiningOp();
399       if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) {
400         std::optional<Operation *> alloca =
401             allocInterface.buildPromotedAlloc(builder, alloc);
402         if (!alloca)
403           continue;
404         // Replace the original alloc by a newly created alloca.
405         allocOp->replaceAllUsesWith(alloca.value());
406         allocOp->erase();
407       }
408     }
409   }
410 };
411 
412 //===----------------------------------------------------------------------===//
413 // BufferOptimizationPasses
414 //===----------------------------------------------------------------------===//
415 
416 /// The buffer hoisting pass that hoists allocation nodes into dominating
417 /// blocks.
418 struct BufferHoistingPass
419     : public bufferization::impl::BufferHoistingBase<BufferHoistingPass> {
420 
runOnOperation__anon5e63145a0211::BufferHoistingPass421   void runOnOperation() override {
422     // Hoist all allocations into dominator blocks.
423     BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
424         getOperation());
425     optimizer.hoist();
426   }
427 };
428 
429 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
430 struct BufferLoopHoistingPass
431     : public bufferization::impl::BufferLoopHoistingBase<
432           BufferLoopHoistingPass> {
433 
runOnOperation__anon5e63145a0211::BufferLoopHoistingPass434   void runOnOperation() override {
435     // Hoist all allocations out of loops.
436     hoistBuffersFromLoops(getOperation());
437   }
438 };
439 
440 /// The promote buffer to stack pass that tries to convert alloc nodes into
441 /// alloca nodes.
442 class PromoteBuffersToStackPass
443     : public bufferization::impl::PromoteBuffersToStackBase<
444           PromoteBuffersToStackPass> {
445 public:
PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,unsigned maxRankOfAllocatedMemRef)446   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
447                             unsigned maxRankOfAllocatedMemRef) {
448     this->maxAllocSizeInBytes = maxAllocSizeInBytes;
449     this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
450   }
451 
PromoteBuffersToStackPass(std::function<bool (Value)> isSmallAlloc)452   explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
453       : isSmallAlloc(std::move(isSmallAlloc)) {}
454 
initialize(MLIRContext * context)455   LogicalResult initialize(MLIRContext *context) override {
456     if (isSmallAlloc == nullptr) {
457       isSmallAlloc = [=](Value alloc) {
458         return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
459                                    maxRankOfAllocatedMemRef);
460       };
461     }
462     return success();
463   }
464 
runOnOperation()465   void runOnOperation() override {
466     // Move all allocation nodes and convert candidates into allocas.
467     BufferPlacementPromotion optimizer(getOperation());
468     optimizer.promote(isSmallAlloc);
469   }
470 
471 private:
472   std::function<bool(Value)> isSmallAlloc;
473 };
474 
475 } // namespace
476 
hoistBuffersFromLoops(Operation * op)477 void mlir::bufferization::hoistBuffersFromLoops(Operation *op) {
478   BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op);
479   optimizer.hoist();
480 }
481 
createBufferHoistingPass()482 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() {
483   return std::make_unique<BufferHoistingPass>();
484 }
485 
createBufferLoopHoistingPass()486 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() {
487   return std::make_unique<BufferLoopHoistingPass>();
488 }
489 
createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,unsigned maxRankOfAllocatedMemRef)490 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
491     unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) {
492   return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
493                                                      maxRankOfAllocatedMemRef);
494 }
495 
createPromoteBuffersToStackPass(std::function<bool (Value)> isSmallAlloc)496 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
497     std::function<bool(Value)> isSmallAlloc) {
498   return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
499 }
500