xref: /llvm-project/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp (revision 136d746ec7f43584f68c11d3ccc4088db4734d29)
1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
14 #include "PassDetail.h"
15 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
16 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
17 #include "mlir/Dialect/MemRef/IR/MemRef.h"
18 #include "mlir/IR/Operation.h"
19 #include "mlir/Interfaces/LoopLikeInterface.h"
20 #include "mlir/Pass/Pass.h"
21 
22 using namespace mlir;
23 using namespace mlir::bufferization;
24 
25 /// Returns true if the given operation implements a known high-level region-
26 /// based control-flow interface.
27 static bool isKnownControlFlowInterface(Operation *op) {
28   return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
29 }
30 
31 /// Check if the size of the allocation is less than the given size. The
32 /// transformation is only applied to small buffers since large buffers could
33 /// exceed the stack space.
34 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
35                                 unsigned maxRankOfAllocatedMemRef) {
36   auto type = alloc.getType().dyn_cast<ShapedType>();
37   if (!type || !alloc.getDefiningOp<memref::AllocOp>())
38     return false;
39   if (!type.hasStaticShape()) {
40     // Check if the dynamic shape dimension of the alloc is produced by
41     // `memref.rank`. If this is the case, it is likely to be small.
42     // Furthermore, the dimension is limited to the maximum rank of the
43     // allocated memref to avoid large values by multiplying several small
44     // values.
45     if (type.getRank() <= maxRankOfAllocatedMemRef) {
46       return llvm::all_of(alloc.getDefiningOp()->getOperands(),
47                           [&](Value operand) {
48                             return operand.getDefiningOp<memref::RankOp>();
49                           });
50     }
51     return false;
52   }
53   unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
54                           .getTypeSizeInBits(type.getElementType());
55   return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
56 }
57 
58 /// Checks whether the given aliases leave the allocation scope.
59 static bool
60 leavesAllocationScope(Region *parentRegion,
61                       const BufferViewFlowAnalysis::ValueSetT &aliases) {
62   for (Value alias : aliases) {
63     for (auto *use : alias.getUsers()) {
64       // If there is at least one alias that leaves the parent region, we know
65       // that this alias escapes the whole region and hence the associated
66       // allocation leaves allocation scope.
67       if (isRegionReturnLike(use) && use->getParentRegion() == parentRegion)
68         return true;
69     }
70   }
71   return false;
72 }
73 
74 /// Checks, if an automated allocation scope for a given alloc value exists.
75 static bool hasAllocationScope(Value alloc,
76                                const BufferViewFlowAnalysis &aliasAnalysis) {
77   Region *region = alloc.getParentRegion();
78   do {
79     if (Operation *parentOp = region->getParentOp()) {
80       // Check if the operation is an automatic allocation scope and whether an
81       // alias leaves the scope. This means, an allocation yields out of
82       // this scope and can not be transformed in a stack-based allocation.
83       if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
84           !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
85         return true;
86       // Check if the operation is a known control flow interface and break the
87       // loop to avoid transformation in loops. Furthermore skip transformation
88       // if the operation does not implement a RegionBeanchOpInterface.
89       if (BufferPlacementTransformationBase::isLoop(parentOp) ||
90           !isKnownControlFlowInterface(parentOp))
91         break;
92     }
93   } while ((region = region->getParentRegion()));
94   return false;
95 }
96 
97 namespace {
98 
99 //===----------------------------------------------------------------------===//
100 // BufferAllocationHoisting
101 //===----------------------------------------------------------------------===//
102 
103 /// A base implementation compatible with the `BufferAllocationHoisting` class.
104 struct BufferAllocationHoistingStateBase {
105   /// A pointer to the current dominance info.
106   DominanceInfo *dominators;
107 
108   /// The current allocation value.
109   Value allocValue;
110 
111   /// The current placement block (if any).
112   Block *placementBlock;
113 
114   /// Initializes the state base.
115   BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
116                                     Block *placementBlock)
117       : dominators(dominators), allocValue(allocValue),
118         placementBlock(placementBlock) {}
119 };
120 
121 /// Implements the actual hoisting logic for allocation nodes.
122 template <typename StateT>
123 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
124 public:
125   BufferAllocationHoisting(Operation *op)
126       : BufferPlacementTransformationBase(op), dominators(op),
127         postDominators(op), scopeOp(op) {}
128 
129   /// Moves allocations upwards.
130   void hoist() {
131     SmallVector<Value> allocsAndAllocas;
132     for (BufferPlacementAllocs::AllocEntry &entry : allocs)
133       allocsAndAllocas.push_back(std::get<0>(entry));
134     scopeOp->walk([&](memref::AllocaOp op) {
135       allocsAndAllocas.push_back(op.getMemref());
136     });
137 
138     for (auto allocValue : allocsAndAllocas) {
139       if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
140         continue;
141       Operation *definingOp = allocValue.getDefiningOp();
142       assert(definingOp && "No defining op");
143       auto operands = definingOp->getOperands();
144       auto resultAliases = aliases.resolve(allocValue);
145       // Determine the common dominator block of all aliases.
146       Block *dominatorBlock =
147           findCommonDominator(allocValue, resultAliases, dominators);
148       // Init the initial hoisting state.
149       StateT state(&dominators, allocValue, allocValue.getParentBlock());
150       // Check for additional allocation dependencies to compute an upper bound
151       // for hoisting.
152       Block *dependencyBlock = nullptr;
153       // If this node has dependencies, check all dependent nodes. This ensures
154       // that all dependency values have been computed before allocating the
155       // buffer.
156       for (Value depValue : operands) {
157         Block *depBlock = depValue.getParentBlock();
158         if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
159           dependencyBlock = depBlock;
160       }
161 
162       // Find the actual placement block and determine the start operation using
163       // an upper placement-block boundary. The idea is that placement block
164       // cannot be moved any further upwards than the given upper bound.
165       Block *placementBlock = findPlacementBlock(
166           state, state.computeUpperBound(dominatorBlock, dependencyBlock));
167       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
168           allocValue, placementBlock, liveness);
169 
170       // Move the alloc in front of the start operation.
171       Operation *allocOperation = allocValue.getDefiningOp();
172       allocOperation->moveBefore(startOperation);
173     }
174   }
175 
176 private:
177   /// Finds a valid placement block by walking upwards in the CFG until we
178   /// either cannot continue our walk due to constraints (given by the StateT
179   /// implementation) or we have reached the upper-most dominator block.
180   Block *findPlacementBlock(StateT &state, Block *upperBound) {
181     Block *currentBlock = state.placementBlock;
182     // Walk from the innermost regions/loops to the outermost regions/loops and
183     // find an appropriate placement block that satisfies the constraint of the
184     // current StateT implementation. Walk until we reach the upperBound block
185     // (if any).
186 
187     // If we are not able to find a valid parent operation or an associated
188     // parent block, break the walk loop.
189     Operation *parentOp;
190     Block *parentBlock;
191     while ((parentOp = currentBlock->getParentOp()) &&
192            (parentBlock = parentOp->getBlock()) &&
193            (!upperBound ||
194             dominators.properlyDominates(upperBound, currentBlock))) {
195       // Try to find an immediate dominator and check whether the parent block
196       // is above the immediate dominator (if any).
197       DominanceInfoNode *idom = nullptr;
198 
199       // DominanceInfo doesn't support getNode queries for single-block regions.
200       if (!currentBlock->isEntryBlock())
201         idom = dominators.getNode(currentBlock)->getIDom();
202 
203       if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
204         // If the current immediate dominator is below the placement block, move
205         // to the immediate dominator block.
206         currentBlock = idom->getBlock();
207         state.recordMoveToDominator(currentBlock);
208       } else {
209         // We have to move to our parent block since an immediate dominator does
210         // either not exist or is above our parent block. If we cannot move to
211         // our parent operation due to constraints given by the StateT
212         // implementation, break the walk loop. Furthermore, we should not move
213         // allocations out of unknown region-based control-flow operations.
214         if (!isKnownControlFlowInterface(parentOp) ||
215             !state.isLegalPlacement(parentOp))
216           break;
217         // Move to our parent block by notifying the current StateT
218         // implementation.
219         currentBlock = parentBlock;
220         state.recordMoveToParent(currentBlock);
221       }
222     }
223     // Return the finally determined placement block.
224     return state.placementBlock;
225   }
226 
227   /// The dominator info to find the appropriate start operation to move the
228   /// allocs.
229   DominanceInfo dominators;
230 
231   /// The post dominator info to move the dependent allocs in the right
232   /// position.
233   PostDominanceInfo postDominators;
234 
235   /// The map storing the final placement blocks of a given alloc value.
236   llvm::DenseMap<Value, Block *> placementBlocks;
237 
238   /// The operation that this transformation is working on. It is used to also
239   /// gather allocas.
240   Operation *scopeOp;
241 };
242 
243 /// A state implementation compatible with the `BufferAllocationHoisting` class
244 /// that hoists allocations into dominator blocks while keeping them inside of
245 /// loops.
246 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
247   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
248 
249   /// Computes the upper bound for the placement block search.
250   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
251     // If we do not have a dependency block, the upper bound is given by the
252     // dominator block.
253     if (!dependencyBlock)
254       return dominatorBlock;
255 
256     // Find the "lower" block of the dominator and the dependency block to
257     // ensure that we do not move allocations above this block.
258     return dominators->properlyDominates(dominatorBlock, dependencyBlock)
259                ? dependencyBlock
260                : dominatorBlock;
261   }
262 
263   /// Returns true if the given operation does not represent a loop.
264   bool isLegalPlacement(Operation *op) {
265     return !BufferPlacementTransformationBase::isLoop(op);
266   }
267 
268   /// Returns true if the given operation should be considered for hoisting.
269   static bool shouldHoistOpType(Operation *op) {
270     return llvm::isa<memref::AllocOp>(op);
271   }
272 
273   /// Sets the current placement block to the given block.
274   void recordMoveToDominator(Block *block) { placementBlock = block; }
275 
276   /// Sets the current placement block to the given block.
277   void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
278 };
279 
280 /// A state implementation compatible with the `BufferAllocationHoisting` class
281 /// that hoists allocations out of loops.
282 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
283   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
284 
285   /// Remembers the dominator block of all aliases.
286   Block *aliasDominatorBlock = nullptr;
287 
288   /// Computes the upper bound for the placement block search.
289   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
290     aliasDominatorBlock = dominatorBlock;
291     // If there is a dependency block, we have to use this block as an upper
292     // bound to satisfy all allocation value dependencies.
293     return dependencyBlock ? dependencyBlock : nullptr;
294   }
295 
296   /// Returns true if the given operation represents a loop and one of the
297   /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
298   /// given loop operation. If this is the case, it indicates that the
299   /// allocation is passed via a back edge.
300   bool isLegalPlacement(Operation *op) {
301     return BufferPlacementTransformationBase::isLoop(op) &&
302            !dominators->dominates(aliasDominatorBlock, op->getBlock());
303   }
304 
305   /// Returns true if the given operation should be considered for hoisting.
306   static bool shouldHoistOpType(Operation *op) {
307     return llvm::isa<memref::AllocOp, memref::AllocaOp>(op);
308   }
309 
310   /// Does not change the internal placement block, as we want to move
311   /// operations out of loops only.
312   void recordMoveToDominator(Block *block) {}
313 
314   /// Sets the current placement block to the given block.
315   void recordMoveToParent(Block *block) { placementBlock = block; }
316 };
317 
318 //===----------------------------------------------------------------------===//
319 // BufferPlacementPromotion
320 //===----------------------------------------------------------------------===//
321 
322 /// Promotes heap-based allocations to stack-based allocations (if possible).
323 class BufferPlacementPromotion : BufferPlacementTransformationBase {
324 public:
325   BufferPlacementPromotion(Operation *op)
326       : BufferPlacementTransformationBase(op) {}
327 
328   /// Promote buffers to stack-based allocations.
329   void promote(function_ref<bool(Value)> isSmallAlloc) {
330     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
331       Value alloc = std::get<0>(entry);
332       Operation *dealloc = std::get<1>(entry);
333       // Checking several requirements to transform an AllocOp into an AllocaOp.
334       // The transformation is done if the allocation is limited to a given
335       // size. Furthermore, a deallocation must not be defined for this
336       // allocation entry and a parent allocation scope must exist.
337       if (!isSmallAlloc(alloc) || dealloc ||
338           !hasAllocationScope(alloc, aliases))
339         continue;
340 
341       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
342           alloc, alloc.getParentBlock(), liveness);
343       // Build a new alloca that is associated with its parent
344       // `AutomaticAllocationScope` determined during the initialization phase.
345       OpBuilder builder(startOperation);
346       Operation *allocOp = alloc.getDefiningOp();
347       Operation *alloca = builder.create<memref::AllocaOp>(
348           alloc.getLoc(), alloc.getType().cast<MemRefType>(),
349           allocOp->getOperands());
350 
351       // Replace the original alloc by a newly created alloca.
352       allocOp->replaceAllUsesWith(alloca);
353       allocOp->erase();
354     }
355   }
356 };
357 
358 //===----------------------------------------------------------------------===//
359 // BufferOptimizationPasses
360 //===----------------------------------------------------------------------===//
361 
362 /// The buffer hoisting pass that hoists allocation nodes into dominating
363 /// blocks.
364 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> {
365 
366   void runOnOperation() override {
367     // Hoist all allocations into dominator blocks.
368     BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
369         getOperation());
370     optimizer.hoist();
371   }
372 };
373 
374 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
375 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
376 
377   void runOnOperation() override {
378     // Hoist all allocations out of loops.
379     BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(
380         getOperation());
381     optimizer.hoist();
382   }
383 };
384 
385 /// The promote buffer to stack pass that tries to convert alloc nodes into
386 /// alloca nodes.
387 class PromoteBuffersToStackPass
388     : public PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
389 public:
390   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
391                             unsigned maxRankOfAllocatedMemRef) {
392     this->maxAllocSizeInBytes = maxAllocSizeInBytes;
393     this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
394   }
395 
396   explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
397       : isSmallAlloc(std::move(isSmallAlloc)) {}
398 
399   LogicalResult initialize(MLIRContext *context) override {
400     if (isSmallAlloc == nullptr) {
401       isSmallAlloc = [=](Value alloc) {
402         return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
403                                    maxRankOfAllocatedMemRef);
404       };
405     }
406     return success();
407   }
408 
409   void runOnOperation() override {
410     // Move all allocation nodes and convert candidates into allocas.
411     BufferPlacementPromotion optimizer(getOperation());
412     optimizer.promote(isSmallAlloc);
413   }
414 
415 private:
416   std::function<bool(Value)> isSmallAlloc;
417 };
418 
419 } // namespace
420 
421 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() {
422   return std::make_unique<BufferHoistingPass>();
423 }
424 
425 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() {
426   return std::make_unique<BufferLoopHoistingPass>();
427 }
428 
429 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
430     unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) {
431   return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
432                                                      maxRankOfAllocatedMemRef);
433 }
434 
435 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
436     std::function<bool(Value)> isSmallAlloc) {
437   return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
438 }
439