xref: /llvm-project/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp (revision dd450f08cfeb9da372cbe459058bc9ae9425f862)
1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
14 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
15 
16 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
17 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
18 #include "mlir/Dialect/Bufferization/Transforms/Transforms.h"
19 #include "mlir/Dialect/Func/IR/FuncOps.h"
20 #include "mlir/Dialect/MemRef/IR/MemRef.h"
21 #include "mlir/IR/Operation.h"
22 #include "mlir/Interfaces/LoopLikeInterface.h"
23 #include "mlir/Pass/Pass.h"
24 
25 namespace mlir {
26 namespace bufferization {
27 #define GEN_PASS_DEF_BUFFERHOISTING
28 #define GEN_PASS_DEF_BUFFERLOOPHOISTING
29 #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACK
30 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
31 } // namespace bufferization
32 } // namespace mlir
33 
34 using namespace mlir;
35 using namespace mlir::bufferization;
36 
37 /// Returns true if the given operation implements a known high-level region-
38 /// based control-flow interface.
39 static bool isKnownControlFlowInterface(Operation *op) {
40   return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
41 }
42 
43 /// Returns true if the given operation represents a loop by testing whether it
44 /// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In
45 /// the case of a `RegionBranchOpInterface`, it checks all region-based control-
46 /// flow edges for cycles.
47 static bool isLoop(Operation *op) {
48   // If the operation implements the `LoopLikeOpInterface` it can be considered
49   // a loop.
50   if (isa<LoopLikeOpInterface>(op))
51     return true;
52 
53   // If the operation does not implement the `RegionBranchOpInterface`, it is
54   // (currently) not possible to detect a loop.
55   auto regionInterface = dyn_cast<RegionBranchOpInterface>(op);
56   if (!regionInterface)
57     return false;
58 
59   return regionInterface.hasLoop();
60 }
61 
62 /// Returns true if the given operation implements the AllocationOpInterface
63 /// and it supports the dominate block hoisting.
64 static bool allowAllocDominateBlockHoisting(Operation *op) {
65   auto allocOp = dyn_cast<AllocationOpInterface>(op);
66   return allocOp &&
67          static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block);
68 }
69 
70 /// Returns true if the given operation implements the AllocationOpInterface
71 /// and it supports the loop hoisting.
72 static bool allowAllocLoopHoisting(Operation *op) {
73   auto allocOp = dyn_cast<AllocationOpInterface>(op);
74   return allocOp &&
75          static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop);
76 }
77 
78 /// Check if the size of the allocation is less than the given size. The
79 /// transformation is only applied to small buffers since large buffers could
80 /// exceed the stack space.
81 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
82                                 unsigned maxRankOfAllocatedMemRef) {
83   auto type = dyn_cast<ShapedType>(alloc.getType());
84   if (!type || !alloc.getDefiningOp<memref::AllocOp>())
85     return false;
86   if (!type.hasStaticShape()) {
87     // Check if the dynamic shape dimension of the alloc is produced by
88     // `memref.rank`. If this is the case, it is likely to be small.
89     // Furthermore, the dimension is limited to the maximum rank of the
90     // allocated memref to avoid large values by multiplying several small
91     // values.
92     if (type.getRank() <= maxRankOfAllocatedMemRef) {
93       return llvm::all_of(alloc.getDefiningOp()->getOperands(),
94                           [&](Value operand) {
95                             return operand.getDefiningOp<memref::RankOp>();
96                           });
97     }
98     return false;
99   }
100   unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
101                           .getTypeSizeInBits(type.getElementType());
102   return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
103 }
104 
105 /// Checks whether the given aliases leave the allocation scope.
106 static bool
107 leavesAllocationScope(Region *parentRegion,
108                       const BufferViewFlowAnalysis::ValueSetT &aliases) {
109   for (Value alias : aliases) {
110     for (auto *use : alias.getUsers()) {
111       // If there is at least one alias that leaves the parent region, we know
112       // that this alias escapes the whole region and hence the associated
113       // allocation leaves allocation scope.
114       if (isa<RegionBranchTerminatorOpInterface>(use) &&
115           use->getParentRegion() == parentRegion)
116         return true;
117     }
118   }
119   return false;
120 }
121 
122 /// Checks, if an automated allocation scope for a given alloc value exists.
123 static bool hasAllocationScope(Value alloc,
124                                const BufferViewFlowAnalysis &aliasAnalysis) {
125   Region *region = alloc.getParentRegion();
126   do {
127     if (Operation *parentOp = region->getParentOp()) {
128       // Check if the operation is an automatic allocation scope and whether an
129       // alias leaves the scope. This means, an allocation yields out of
130       // this scope and can not be transformed in a stack-based allocation.
131       if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
132           !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
133         return true;
134       // Check if the operation is a known control flow interface and break the
135       // loop to avoid transformation in loops. Furthermore skip transformation
136       // if the operation does not implement a RegionBeanchOpInterface.
137       if (isLoop(parentOp) || !isKnownControlFlowInterface(parentOp))
138         break;
139     }
140   } while ((region = region->getParentRegion()));
141   return false;
142 }
143 
144 namespace {
145 
146 //===----------------------------------------------------------------------===//
147 // BufferAllocationHoisting
148 //===----------------------------------------------------------------------===//
149 
150 /// A base implementation compatible with the `BufferAllocationHoisting` class.
151 struct BufferAllocationHoistingStateBase {
152   /// A pointer to the current dominance info.
153   DominanceInfo *dominators;
154 
155   /// The current allocation value.
156   Value allocValue;
157 
158   /// The current placement block (if any).
159   Block *placementBlock;
160 
161   /// Initializes the state base.
162   BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
163                                     Block *placementBlock)
164       : dominators(dominators), allocValue(allocValue),
165         placementBlock(placementBlock) {}
166 };
167 
168 /// Implements the actual hoisting logic for allocation nodes.
169 template <typename StateT>
170 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
171 public:
172   BufferAllocationHoisting(Operation *op)
173       : BufferPlacementTransformationBase(op), dominators(op),
174         postDominators(op), scopeOp(op) {}
175 
176   /// Moves allocations upwards.
177   void hoist() {
178     SmallVector<Value> allocsAndAllocas;
179     for (BufferPlacementAllocs::AllocEntry &entry : allocs)
180       allocsAndAllocas.push_back(std::get<0>(entry));
181     scopeOp->walk([&](memref::AllocaOp op) {
182       allocsAndAllocas.push_back(op.getMemref());
183     });
184 
185     for (auto allocValue : allocsAndAllocas) {
186       if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
187         continue;
188       Operation *definingOp = allocValue.getDefiningOp();
189       assert(definingOp && "No defining op");
190       auto operands = definingOp->getOperands();
191       auto resultAliases = aliases.resolve(allocValue);
192       // Determine the common dominator block of all aliases.
193       Block *dominatorBlock =
194           findCommonDominator(allocValue, resultAliases, dominators);
195       // Init the initial hoisting state.
196       StateT state(&dominators, allocValue, allocValue.getParentBlock());
197       // Check for additional allocation dependencies to compute an upper bound
198       // for hoisting.
199       Block *dependencyBlock = nullptr;
200       // If this node has dependencies, check all dependent nodes. This ensures
201       // that all dependency values have been computed before allocating the
202       // buffer.
203       for (Value depValue : operands) {
204         Block *depBlock = depValue.getParentBlock();
205         if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
206           dependencyBlock = depBlock;
207       }
208 
209       // Find the actual placement block and determine the start operation using
210       // an upper placement-block boundary. The idea is that placement block
211       // cannot be moved any further upwards than the given upper bound.
212       Block *placementBlock = findPlacementBlock(
213           state, state.computeUpperBound(dominatorBlock, dependencyBlock));
214       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
215           allocValue, placementBlock, liveness);
216 
217       // Move the alloc in front of the start operation.
218       Operation *allocOperation = allocValue.getDefiningOp();
219       allocOperation->moveBefore(startOperation);
220     }
221   }
222 
223 private:
224   /// Finds a valid placement block by walking upwards in the CFG until we
225   /// either cannot continue our walk due to constraints (given by the StateT
226   /// implementation) or we have reached the upper-most dominator block.
227   Block *findPlacementBlock(StateT &state, Block *upperBound) {
228     Block *currentBlock = state.placementBlock;
229     // Walk from the innermost regions/loops to the outermost regions/loops and
230     // find an appropriate placement block that satisfies the constraint of the
231     // current StateT implementation. Walk until we reach the upperBound block
232     // (if any).
233 
234     // If we are not able to find a valid parent operation or an associated
235     // parent block, break the walk loop.
236     Operation *parentOp;
237     Block *parentBlock;
238     while ((parentOp = currentBlock->getParentOp()) &&
239            (parentBlock = parentOp->getBlock()) &&
240            (!upperBound ||
241             dominators.properlyDominates(upperBound, currentBlock))) {
242       // Try to find an immediate dominator and check whether the parent block
243       // is above the immediate dominator (if any).
244       DominanceInfoNode *idom = nullptr;
245 
246       // DominanceInfo doesn't support getNode queries for single-block regions.
247       if (!currentBlock->isEntryBlock())
248         idom = dominators.getNode(currentBlock)->getIDom();
249 
250       if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
251         // If the current immediate dominator is below the placement block, move
252         // to the immediate dominator block.
253         currentBlock = idom->getBlock();
254         state.recordMoveToDominator(currentBlock);
255       } else {
256         // We have to move to our parent block since an immediate dominator does
257         // either not exist or is above our parent block. If we cannot move to
258         // our parent operation due to constraints given by the StateT
259         // implementation, break the walk loop. Furthermore, we should not move
260         // allocations out of unknown region-based control-flow operations.
261         if (!isKnownControlFlowInterface(parentOp) ||
262             !state.isLegalPlacement(parentOp))
263           break;
264         // Move to our parent block by notifying the current StateT
265         // implementation.
266         currentBlock = parentBlock;
267         state.recordMoveToParent(currentBlock);
268       }
269     }
270     // Return the finally determined placement block.
271     return state.placementBlock;
272   }
273 
274   /// The dominator info to find the appropriate start operation to move the
275   /// allocs.
276   DominanceInfo dominators;
277 
278   /// The post dominator info to move the dependent allocs in the right
279   /// position.
280   PostDominanceInfo postDominators;
281 
282   /// The map storing the final placement blocks of a given alloc value.
283   llvm::DenseMap<Value, Block *> placementBlocks;
284 
285   /// The operation that this transformation is working on. It is used to also
286   /// gather allocas.
287   Operation *scopeOp;
288 };
289 
290 /// A state implementation compatible with the `BufferAllocationHoisting` class
291 /// that hoists allocations into dominator blocks while keeping them inside of
292 /// loops.
293 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
294   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
295 
296   /// Computes the upper bound for the placement block search.
297   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
298     // If we do not have a dependency block, the upper bound is given by the
299     // dominator block.
300     if (!dependencyBlock)
301       return dominatorBlock;
302 
303     // Find the "lower" block of the dominator and the dependency block to
304     // ensure that we do not move allocations above this block.
305     return dominators->properlyDominates(dominatorBlock, dependencyBlock)
306                ? dependencyBlock
307                : dominatorBlock;
308   }
309 
310   /// Returns true if the given operation does not represent a loop.
311   bool isLegalPlacement(Operation *op) { return !isLoop(op); }
312 
313   /// Returns true if the given operation should be considered for hoisting.
314   static bool shouldHoistOpType(Operation *op) {
315     return allowAllocDominateBlockHoisting(op);
316   }
317 
318   /// Sets the current placement block to the given block.
319   void recordMoveToDominator(Block *block) { placementBlock = block; }
320 
321   /// Sets the current placement block to the given block.
322   void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
323 };
324 
325 /// A state implementation compatible with the `BufferAllocationHoisting` class
326 /// that hoists allocations out of loops.
327 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
328   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
329 
330   /// Remembers the dominator block of all aliases.
331   Block *aliasDominatorBlock = nullptr;
332 
333   /// Computes the upper bound for the placement block search.
334   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
335     aliasDominatorBlock = dominatorBlock;
336     // If there is a dependency block, we have to use this block as an upper
337     // bound to satisfy all allocation value dependencies.
338     return dependencyBlock ? dependencyBlock : nullptr;
339   }
340 
341   /// Returns true if the given operation represents a loop and one of the
342   /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
343   /// given loop operation. If this is the case, it indicates that the
344   /// allocation is passed via a back edge.
345   bool isLegalPlacement(Operation *op) {
346     return isLoop(op) &&
347            !dominators->dominates(aliasDominatorBlock, op->getBlock());
348   }
349 
350   /// Returns true if the given operation should be considered for hoisting.
351   static bool shouldHoistOpType(Operation *op) {
352     return allowAllocLoopHoisting(op);
353   }
354 
355   /// Does not change the internal placement block, as we want to move
356   /// operations out of loops only.
357   void recordMoveToDominator(Block *block) {}
358 
359   /// Sets the current placement block to the given block.
360   void recordMoveToParent(Block *block) { placementBlock = block; }
361 };
362 
363 //===----------------------------------------------------------------------===//
364 // BufferPlacementPromotion
365 //===----------------------------------------------------------------------===//
366 
367 /// Promotes heap-based allocations to stack-based allocations (if possible).
368 class BufferPlacementPromotion : BufferPlacementTransformationBase {
369 public:
370   BufferPlacementPromotion(Operation *op)
371       : BufferPlacementTransformationBase(op) {}
372 
373   /// Promote buffers to stack-based allocations.
374   void promote(function_ref<bool(Value)> isSmallAlloc) {
375     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
376       Value alloc = std::get<0>(entry);
377       Operation *dealloc = std::get<1>(entry);
378       // Checking several requirements to transform an AllocOp into an AllocaOp.
379       // The transformation is done if the allocation is limited to a given
380       // size. Furthermore, a deallocation must not be defined for this
381       // allocation entry and a parent allocation scope must exist.
382       if (!isSmallAlloc(alloc) || dealloc ||
383           !hasAllocationScope(alloc, aliases))
384         continue;
385 
386       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
387           alloc, alloc.getParentBlock(), liveness);
388       // Build a new alloca that is associated with its parent
389       // `AutomaticAllocationScope` determined during the initialization phase.
390       OpBuilder builder(startOperation);
391       Operation *allocOp = alloc.getDefiningOp();
392       if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) {
393         Operation *alloca =
394             allocInterface.buildPromotedAlloc(builder, alloc).value();
395         if (!alloca)
396           continue;
397         // Replace the original alloc by a newly created alloca.
398         allocOp->replaceAllUsesWith(alloca);
399         allocOp->erase();
400       }
401     }
402   }
403 };
404 
405 //===----------------------------------------------------------------------===//
406 // BufferOptimizationPasses
407 //===----------------------------------------------------------------------===//
408 
409 /// The buffer hoisting pass that hoists allocation nodes into dominating
410 /// blocks.
411 struct BufferHoistingPass
412     : public bufferization::impl::BufferHoistingBase<BufferHoistingPass> {
413 
414   void runOnOperation() override {
415     // Hoist all allocations into dominator blocks.
416     BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
417         getOperation());
418     optimizer.hoist();
419   }
420 };
421 
422 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
423 struct BufferLoopHoistingPass
424     : public bufferization::impl::BufferLoopHoistingBase<
425           BufferLoopHoistingPass> {
426 
427   void runOnOperation() override {
428     // Hoist all allocations out of loops.
429     hoistBuffersFromLoops(getOperation());
430   }
431 };
432 
433 /// The promote buffer to stack pass that tries to convert alloc nodes into
434 /// alloca nodes.
435 class PromoteBuffersToStackPass
436     : public bufferization::impl::PromoteBuffersToStackBase<
437           PromoteBuffersToStackPass> {
438 public:
439   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
440                             unsigned maxRankOfAllocatedMemRef) {
441     this->maxAllocSizeInBytes = maxAllocSizeInBytes;
442     this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
443   }
444 
445   explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
446       : isSmallAlloc(std::move(isSmallAlloc)) {}
447 
448   LogicalResult initialize(MLIRContext *context) override {
449     if (isSmallAlloc == nullptr) {
450       isSmallAlloc = [=](Value alloc) {
451         return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
452                                    maxRankOfAllocatedMemRef);
453       };
454     }
455     return success();
456   }
457 
458   void runOnOperation() override {
459     // Move all allocation nodes and convert candidates into allocas.
460     BufferPlacementPromotion optimizer(getOperation());
461     optimizer.promote(isSmallAlloc);
462   }
463 
464 private:
465   std::function<bool(Value)> isSmallAlloc;
466 };
467 
468 } // namespace
469 
470 void mlir::bufferization::hoistBuffersFromLoops(Operation *op) {
471   BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op);
472   optimizer.hoist();
473 }
474 
475 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() {
476   return std::make_unique<BufferHoistingPass>();
477 }
478 
479 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() {
480   return std::make_unique<BufferLoopHoistingPass>();
481 }
482 
483 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
484     unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) {
485   return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
486                                                      maxRankOfAllocatedMemRef);
487 }
488 
489 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
490     std::function<bool(Value)> isSmallAlloc) {
491   return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
492 }
493