1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for three optimization passes. The first two 10 // passes try to move alloc nodes out of blocks to reduce the number of 11 // allocations and copies during buffer deallocation. The third pass tries to 12 // convert heap-based allocations to stack-based allocations, if possible. 13 14 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 15 16 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" 17 #include "mlir/Dialect/Bufferization/Transforms/Transforms.h" 18 #include "mlir/Dialect/Func/IR/FuncOps.h" 19 #include "mlir/Dialect/MemRef/IR/MemRef.h" 20 #include "mlir/IR/Operation.h" 21 #include "mlir/Interfaces/LoopLikeInterface.h" 22 #include "mlir/Pass/Pass.h" 23 24 namespace mlir { 25 namespace bufferization { 26 #define GEN_PASS_DEF_BUFFERHOISTING 27 #define GEN_PASS_DEF_BUFFERLOOPHOISTING 28 #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACK 29 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc" 30 } // namespace bufferization 31 } // namespace mlir 32 33 using namespace mlir; 34 using namespace mlir::bufferization; 35 36 /// Returns true if the given operation implements a known high-level region- 37 /// based control-flow interface. 38 static bool isKnownControlFlowInterface(Operation *op) { 39 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op); 40 } 41 42 /// Check if the size of the allocation is less than the given size. The 43 /// transformation is only applied to small buffers since large buffers could 44 /// exceed the stack space. 45 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, 46 unsigned maxRankOfAllocatedMemRef) { 47 auto type = dyn_cast<ShapedType>(alloc.getType()); 48 if (!type || !alloc.getDefiningOp<memref::AllocOp>()) 49 return false; 50 if (!type.hasStaticShape()) { 51 // Check if the dynamic shape dimension of the alloc is produced by 52 // `memref.rank`. If this is the case, it is likely to be small. 53 // Furthermore, the dimension is limited to the maximum rank of the 54 // allocated memref to avoid large values by multiplying several small 55 // values. 56 if (type.getRank() <= maxRankOfAllocatedMemRef) { 57 return llvm::all_of(alloc.getDefiningOp()->getOperands(), 58 [&](Value operand) { 59 return operand.getDefiningOp<memref::RankOp>(); 60 }); 61 } 62 return false; 63 } 64 unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp()) 65 .getTypeSizeInBits(type.getElementType()); 66 return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8; 67 } 68 69 /// Checks whether the given aliases leave the allocation scope. 70 static bool 71 leavesAllocationScope(Region *parentRegion, 72 const BufferViewFlowAnalysis::ValueSetT &aliases) { 73 for (Value alias : aliases) { 74 for (auto *use : alias.getUsers()) { 75 // If there is at least one alias that leaves the parent region, we know 76 // that this alias escapes the whole region and hence the associated 77 // allocation leaves allocation scope. 78 if (isa<RegionBranchTerminatorOpInterface>(use) && 79 use->getParentRegion() == parentRegion) 80 return true; 81 } 82 } 83 return false; 84 } 85 86 /// Checks, if an automated allocation scope for a given alloc value exists. 87 static bool hasAllocationScope(Value alloc, 88 const BufferViewFlowAnalysis &aliasAnalysis) { 89 Region *region = alloc.getParentRegion(); 90 do { 91 if (Operation *parentOp = region->getParentOp()) { 92 // Check if the operation is an automatic allocation scope and whether an 93 // alias leaves the scope. This means, an allocation yields out of 94 // this scope and can not be transformed in a stack-based allocation. 95 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() && 96 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc))) 97 return true; 98 // Check if the operation is a known control flow interface and break the 99 // loop to avoid transformation in loops. Furthermore skip transformation 100 // if the operation does not implement a RegionBeanchOpInterface. 101 if (BufferPlacementTransformationBase::isLoop(parentOp) || 102 !isKnownControlFlowInterface(parentOp)) 103 break; 104 } 105 } while ((region = region->getParentRegion())); 106 return false; 107 } 108 109 namespace { 110 111 //===----------------------------------------------------------------------===// 112 // BufferAllocationHoisting 113 //===----------------------------------------------------------------------===// 114 115 /// A base implementation compatible with the `BufferAllocationHoisting` class. 116 struct BufferAllocationHoistingStateBase { 117 /// A pointer to the current dominance info. 118 DominanceInfo *dominators; 119 120 /// The current allocation value. 121 Value allocValue; 122 123 /// The current placement block (if any). 124 Block *placementBlock; 125 126 /// Initializes the state base. 127 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue, 128 Block *placementBlock) 129 : dominators(dominators), allocValue(allocValue), 130 placementBlock(placementBlock) {} 131 }; 132 133 /// Implements the actual hoisting logic for allocation nodes. 134 template <typename StateT> 135 class BufferAllocationHoisting : public BufferPlacementTransformationBase { 136 public: 137 BufferAllocationHoisting(Operation *op) 138 : BufferPlacementTransformationBase(op), dominators(op), 139 postDominators(op), scopeOp(op) {} 140 141 /// Moves allocations upwards. 142 void hoist() { 143 SmallVector<Value> allocsAndAllocas; 144 for (BufferPlacementAllocs::AllocEntry &entry : allocs) 145 allocsAndAllocas.push_back(std::get<0>(entry)); 146 scopeOp->walk([&](memref::AllocaOp op) { 147 allocsAndAllocas.push_back(op.getMemref()); 148 }); 149 150 for (auto allocValue : allocsAndAllocas) { 151 if (!StateT::shouldHoistOpType(allocValue.getDefiningOp())) 152 continue; 153 Operation *definingOp = allocValue.getDefiningOp(); 154 assert(definingOp && "No defining op"); 155 auto operands = definingOp->getOperands(); 156 auto resultAliases = aliases.resolve(allocValue); 157 // Determine the common dominator block of all aliases. 158 Block *dominatorBlock = 159 findCommonDominator(allocValue, resultAliases, dominators); 160 // Init the initial hoisting state. 161 StateT state(&dominators, allocValue, allocValue.getParentBlock()); 162 // Check for additional allocation dependencies to compute an upper bound 163 // for hoisting. 164 Block *dependencyBlock = nullptr; 165 // If this node has dependencies, check all dependent nodes. This ensures 166 // that all dependency values have been computed before allocating the 167 // buffer. 168 for (Value depValue : operands) { 169 Block *depBlock = depValue.getParentBlock(); 170 if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock)) 171 dependencyBlock = depBlock; 172 } 173 174 // Find the actual placement block and determine the start operation using 175 // an upper placement-block boundary. The idea is that placement block 176 // cannot be moved any further upwards than the given upper bound. 177 Block *placementBlock = findPlacementBlock( 178 state, state.computeUpperBound(dominatorBlock, dependencyBlock)); 179 Operation *startOperation = BufferPlacementAllocs::getStartOperation( 180 allocValue, placementBlock, liveness); 181 182 // Move the alloc in front of the start operation. 183 Operation *allocOperation = allocValue.getDefiningOp(); 184 allocOperation->moveBefore(startOperation); 185 } 186 } 187 188 private: 189 /// Finds a valid placement block by walking upwards in the CFG until we 190 /// either cannot continue our walk due to constraints (given by the StateT 191 /// implementation) or we have reached the upper-most dominator block. 192 Block *findPlacementBlock(StateT &state, Block *upperBound) { 193 Block *currentBlock = state.placementBlock; 194 // Walk from the innermost regions/loops to the outermost regions/loops and 195 // find an appropriate placement block that satisfies the constraint of the 196 // current StateT implementation. Walk until we reach the upperBound block 197 // (if any). 198 199 // If we are not able to find a valid parent operation or an associated 200 // parent block, break the walk loop. 201 Operation *parentOp; 202 Block *parentBlock; 203 while ((parentOp = currentBlock->getParentOp()) && 204 (parentBlock = parentOp->getBlock()) && 205 (!upperBound || 206 dominators.properlyDominates(upperBound, currentBlock))) { 207 // Try to find an immediate dominator and check whether the parent block 208 // is above the immediate dominator (if any). 209 DominanceInfoNode *idom = nullptr; 210 211 // DominanceInfo doesn't support getNode queries for single-block regions. 212 if (!currentBlock->isEntryBlock()) 213 idom = dominators.getNode(currentBlock)->getIDom(); 214 215 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) { 216 // If the current immediate dominator is below the placement block, move 217 // to the immediate dominator block. 218 currentBlock = idom->getBlock(); 219 state.recordMoveToDominator(currentBlock); 220 } else { 221 // We have to move to our parent block since an immediate dominator does 222 // either not exist or is above our parent block. If we cannot move to 223 // our parent operation due to constraints given by the StateT 224 // implementation, break the walk loop. Furthermore, we should not move 225 // allocations out of unknown region-based control-flow operations. 226 if (!isKnownControlFlowInterface(parentOp) || 227 !state.isLegalPlacement(parentOp)) 228 break; 229 // Move to our parent block by notifying the current StateT 230 // implementation. 231 currentBlock = parentBlock; 232 state.recordMoveToParent(currentBlock); 233 } 234 } 235 // Return the finally determined placement block. 236 return state.placementBlock; 237 } 238 239 /// The dominator info to find the appropriate start operation to move the 240 /// allocs. 241 DominanceInfo dominators; 242 243 /// The post dominator info to move the dependent allocs in the right 244 /// position. 245 PostDominanceInfo postDominators; 246 247 /// The map storing the final placement blocks of a given alloc value. 248 llvm::DenseMap<Value, Block *> placementBlocks; 249 250 /// The operation that this transformation is working on. It is used to also 251 /// gather allocas. 252 Operation *scopeOp; 253 }; 254 255 /// A state implementation compatible with the `BufferAllocationHoisting` class 256 /// that hoists allocations into dominator blocks while keeping them inside of 257 /// loops. 258 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase { 259 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; 260 261 /// Computes the upper bound for the placement block search. 262 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { 263 // If we do not have a dependency block, the upper bound is given by the 264 // dominator block. 265 if (!dependencyBlock) 266 return dominatorBlock; 267 268 // Find the "lower" block of the dominator and the dependency block to 269 // ensure that we do not move allocations above this block. 270 return dominators->properlyDominates(dominatorBlock, dependencyBlock) 271 ? dependencyBlock 272 : dominatorBlock; 273 } 274 275 /// Returns true if the given operation does not represent a loop. 276 bool isLegalPlacement(Operation *op) { 277 return !BufferPlacementTransformationBase::isLoop(op); 278 } 279 280 /// Returns true if the given operation should be considered for hoisting. 281 static bool shouldHoistOpType(Operation *op) { 282 return llvm::isa<memref::AllocOp>(op); 283 } 284 285 /// Sets the current placement block to the given block. 286 void recordMoveToDominator(Block *block) { placementBlock = block; } 287 288 /// Sets the current placement block to the given block. 289 void recordMoveToParent(Block *block) { recordMoveToDominator(block); } 290 }; 291 292 /// A state implementation compatible with the `BufferAllocationHoisting` class 293 /// that hoists allocations out of loops. 294 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase { 295 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; 296 297 /// Remembers the dominator block of all aliases. 298 Block *aliasDominatorBlock = nullptr; 299 300 /// Computes the upper bound for the placement block search. 301 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { 302 aliasDominatorBlock = dominatorBlock; 303 // If there is a dependency block, we have to use this block as an upper 304 // bound to satisfy all allocation value dependencies. 305 return dependencyBlock ? dependencyBlock : nullptr; 306 } 307 308 /// Returns true if the given operation represents a loop and one of the 309 /// aliases caused the `aliasDominatorBlock` to be "above" the block of the 310 /// given loop operation. If this is the case, it indicates that the 311 /// allocation is passed via a back edge. 312 bool isLegalPlacement(Operation *op) { 313 return BufferPlacementTransformationBase::isLoop(op) && 314 !dominators->dominates(aliasDominatorBlock, op->getBlock()); 315 } 316 317 /// Returns true if the given operation should be considered for hoisting. 318 static bool shouldHoistOpType(Operation *op) { 319 return llvm::isa<memref::AllocOp, memref::AllocaOp>(op); 320 } 321 322 /// Does not change the internal placement block, as we want to move 323 /// operations out of loops only. 324 void recordMoveToDominator(Block *block) {} 325 326 /// Sets the current placement block to the given block. 327 void recordMoveToParent(Block *block) { placementBlock = block; } 328 }; 329 330 //===----------------------------------------------------------------------===// 331 // BufferPlacementPromotion 332 //===----------------------------------------------------------------------===// 333 334 /// Promotes heap-based allocations to stack-based allocations (if possible). 335 class BufferPlacementPromotion : BufferPlacementTransformationBase { 336 public: 337 BufferPlacementPromotion(Operation *op) 338 : BufferPlacementTransformationBase(op) {} 339 340 /// Promote buffers to stack-based allocations. 341 void promote(function_ref<bool(Value)> isSmallAlloc) { 342 for (BufferPlacementAllocs::AllocEntry &entry : allocs) { 343 Value alloc = std::get<0>(entry); 344 Operation *dealloc = std::get<1>(entry); 345 // Checking several requirements to transform an AllocOp into an AllocaOp. 346 // The transformation is done if the allocation is limited to a given 347 // size. Furthermore, a deallocation must not be defined for this 348 // allocation entry and a parent allocation scope must exist. 349 if (!isSmallAlloc(alloc) || dealloc || 350 !hasAllocationScope(alloc, aliases)) 351 continue; 352 353 Operation *startOperation = BufferPlacementAllocs::getStartOperation( 354 alloc, alloc.getParentBlock(), liveness); 355 // Build a new alloca that is associated with its parent 356 // `AutomaticAllocationScope` determined during the initialization phase. 357 OpBuilder builder(startOperation); 358 Operation *allocOp = alloc.getDefiningOp(); 359 Operation *alloca = builder.create<memref::AllocaOp>( 360 alloc.getLoc(), cast<MemRefType>(alloc.getType()), 361 allocOp->getOperands(), allocOp->getAttrs()); 362 363 // Replace the original alloc by a newly created alloca. 364 allocOp->replaceAllUsesWith(alloca); 365 allocOp->erase(); 366 } 367 } 368 }; 369 370 //===----------------------------------------------------------------------===// 371 // BufferOptimizationPasses 372 //===----------------------------------------------------------------------===// 373 374 /// The buffer hoisting pass that hoists allocation nodes into dominating 375 /// blocks. 376 struct BufferHoistingPass 377 : public bufferization::impl::BufferHoistingBase<BufferHoistingPass> { 378 379 void runOnOperation() override { 380 // Hoist all allocations into dominator blocks. 381 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer( 382 getOperation()); 383 optimizer.hoist(); 384 } 385 }; 386 387 /// The buffer loop hoisting pass that hoists allocation nodes out of loops. 388 struct BufferLoopHoistingPass 389 : public bufferization::impl::BufferLoopHoistingBase< 390 BufferLoopHoistingPass> { 391 392 void runOnOperation() override { 393 // Hoist all allocations out of loops. 394 hoistBuffersFromLoops(getOperation()); 395 } 396 }; 397 398 /// The promote buffer to stack pass that tries to convert alloc nodes into 399 /// alloca nodes. 400 class PromoteBuffersToStackPass 401 : public bufferization::impl::PromoteBuffersToStackBase< 402 PromoteBuffersToStackPass> { 403 public: 404 PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes, 405 unsigned maxRankOfAllocatedMemRef) { 406 this->maxAllocSizeInBytes = maxAllocSizeInBytes; 407 this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef; 408 } 409 410 explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc) 411 : isSmallAlloc(std::move(isSmallAlloc)) {} 412 413 LogicalResult initialize(MLIRContext *context) override { 414 if (isSmallAlloc == nullptr) { 415 isSmallAlloc = [=](Value alloc) { 416 return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes, 417 maxRankOfAllocatedMemRef); 418 }; 419 } 420 return success(); 421 } 422 423 void runOnOperation() override { 424 // Move all allocation nodes and convert candidates into allocas. 425 BufferPlacementPromotion optimizer(getOperation()); 426 optimizer.promote(isSmallAlloc); 427 } 428 429 private: 430 std::function<bool(Value)> isSmallAlloc; 431 }; 432 433 } // namespace 434 435 void mlir::bufferization::hoistBuffersFromLoops(Operation *op) { 436 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op); 437 optimizer.hoist(); 438 } 439 440 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() { 441 return std::make_unique<BufferHoistingPass>(); 442 } 443 444 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() { 445 return std::make_unique<BufferLoopHoistingPass>(); 446 } 447 448 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( 449 unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) { 450 return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes, 451 maxRankOfAllocatedMemRef); 452 } 453 454 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( 455 std::function<bool(Value)> isSmallAlloc) { 456 return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc)); 457 } 458