1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for three optimization passes. The first two 10 // passes try to move alloc nodes out of blocks to reduce the number of 11 // allocations and copies during buffer deallocation. The third pass tries to 12 // convert heap-based allocations to stack-based allocations, if possible. 13 14 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 15 16 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" 17 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" 18 #include "mlir/Dialect/Bufferization/Transforms/Transforms.h" 19 #include "mlir/Dialect/Func/IR/FuncOps.h" 20 #include "mlir/Dialect/MemRef/IR/MemRef.h" 21 #include "mlir/IR/Operation.h" 22 #include "mlir/Interfaces/LoopLikeInterface.h" 23 #include "mlir/Pass/Pass.h" 24 25 namespace mlir { 26 namespace bufferization { 27 #define GEN_PASS_DEF_BUFFERHOISTING 28 #define GEN_PASS_DEF_BUFFERLOOPHOISTING 29 #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACK 30 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc" 31 } // namespace bufferization 32 } // namespace mlir 33 34 using namespace mlir; 35 using namespace mlir::bufferization; 36 37 /// Returns true if the given operation implements a known high-level region- 38 /// based control-flow interface. 39 static bool isKnownControlFlowInterface(Operation *op) { 40 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op); 41 } 42 43 /// Returns true if the given operation represents a loop by testing whether it 44 /// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In 45 /// the case of a `RegionBranchOpInterface`, it checks all region-based control- 46 /// flow edges for cycles. 47 static bool isLoop(Operation *op) { 48 // If the operation implements the `LoopLikeOpInterface` it can be considered 49 // a loop. 50 if (isa<LoopLikeOpInterface>(op)) 51 return true; 52 53 // If the operation does not implement the `RegionBranchOpInterface`, it is 54 // (currently) not possible to detect a loop. 55 auto regionInterface = dyn_cast<RegionBranchOpInterface>(op); 56 if (!regionInterface) 57 return false; 58 59 return regionInterface.hasLoop(); 60 } 61 62 /// Return whether the given operation is a loop with sequential execution 63 /// semantics. 64 static bool isSequentialLoop(Operation *op) { 65 return !op->hasTrait<OpTrait::HasParallelRegion>() && isLoop(op); 66 } 67 68 /// Returns true if the given operation implements the AllocationOpInterface 69 /// and it supports the dominate block hoisting. 70 static bool allowAllocDominateBlockHoisting(Operation *op) { 71 auto allocOp = dyn_cast<AllocationOpInterface>(op); 72 return allocOp && 73 static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block); 74 } 75 76 /// Returns true if the given operation implements the AllocationOpInterface 77 /// and it supports the loop hoisting. 78 static bool allowAllocLoopHoisting(Operation *op) { 79 auto allocOp = dyn_cast<AllocationOpInterface>(op); 80 return allocOp && 81 static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop); 82 } 83 84 /// Check if the size of the allocation is less than the given size. The 85 /// transformation is only applied to small buffers since large buffers could 86 /// exceed the stack space. 87 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, 88 unsigned maxRankOfAllocatedMemRef) { 89 auto type = dyn_cast<ShapedType>(alloc.getType()); 90 if (!type || !alloc.getDefiningOp<memref::AllocOp>()) 91 return false; 92 if (!type.hasStaticShape()) { 93 // Check if the dynamic shape dimension of the alloc is produced by 94 // `memref.rank`. If this is the case, it is likely to be small. 95 // Furthermore, the dimension is limited to the maximum rank of the 96 // allocated memref to avoid large values by multiplying several small 97 // values. 98 if (type.getRank() <= maxRankOfAllocatedMemRef) { 99 return llvm::all_of(alloc.getDefiningOp()->getOperands(), 100 [&](Value operand) { 101 return operand.getDefiningOp<memref::RankOp>(); 102 }); 103 } 104 return false; 105 } 106 unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp()) 107 .getTypeSizeInBits(type.getElementType()); 108 return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8; 109 } 110 111 /// Checks whether the given aliases leave the allocation scope. 112 static bool 113 leavesAllocationScope(Region *parentRegion, 114 const BufferViewFlowAnalysis::ValueSetT &aliases) { 115 for (Value alias : aliases) { 116 for (auto *use : alias.getUsers()) { 117 // If there is at least one alias that leaves the parent region, we know 118 // that this alias escapes the whole region and hence the associated 119 // allocation leaves allocation scope. 120 if (isa<RegionBranchTerminatorOpInterface>(use) && 121 use->getParentRegion() == parentRegion) 122 return true; 123 } 124 } 125 return false; 126 } 127 128 /// Checks, if an automated allocation scope for a given alloc value exists. 129 static bool hasAllocationScope(Value alloc, 130 const BufferViewFlowAnalysis &aliasAnalysis) { 131 Region *region = alloc.getParentRegion(); 132 do { 133 if (Operation *parentOp = region->getParentOp()) { 134 // Check if the operation is an automatic allocation scope and whether an 135 // alias leaves the scope. This means, an allocation yields out of 136 // this scope and can not be transformed in a stack-based allocation. 137 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() && 138 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc))) 139 return true; 140 // Check if the operation is a known control flow interface and break the 141 // loop to avoid transformation in loops. Furthermore skip transformation 142 // if the operation does not implement a RegionBeanchOpInterface. 143 if (isLoop(parentOp) || !isKnownControlFlowInterface(parentOp)) 144 break; 145 } 146 } while ((region = region->getParentRegion())); 147 return false; 148 } 149 150 namespace { 151 152 //===----------------------------------------------------------------------===// 153 // BufferAllocationHoisting 154 //===----------------------------------------------------------------------===// 155 156 /// A base implementation compatible with the `BufferAllocationHoisting` class. 157 struct BufferAllocationHoistingStateBase { 158 /// A pointer to the current dominance info. 159 DominanceInfo *dominators; 160 161 /// The current allocation value. 162 Value allocValue; 163 164 /// The current placement block (if any). 165 Block *placementBlock; 166 167 /// Initializes the state base. 168 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue, 169 Block *placementBlock) 170 : dominators(dominators), allocValue(allocValue), 171 placementBlock(placementBlock) {} 172 }; 173 174 /// Implements the actual hoisting logic for allocation nodes. 175 template <typename StateT> 176 class BufferAllocationHoisting : public BufferPlacementTransformationBase { 177 public: 178 BufferAllocationHoisting(Operation *op) 179 : BufferPlacementTransformationBase(op), dominators(op), 180 postDominators(op), scopeOp(op) {} 181 182 /// Moves allocations upwards. 183 void hoist() { 184 SmallVector<Value> allocsAndAllocas; 185 for (BufferPlacementAllocs::AllocEntry &entry : allocs) 186 allocsAndAllocas.push_back(std::get<0>(entry)); 187 scopeOp->walk([&](memref::AllocaOp op) { 188 allocsAndAllocas.push_back(op.getMemref()); 189 }); 190 191 for (auto allocValue : allocsAndAllocas) { 192 if (!StateT::shouldHoistOpType(allocValue.getDefiningOp())) 193 continue; 194 Operation *definingOp = allocValue.getDefiningOp(); 195 assert(definingOp && "No defining op"); 196 auto operands = definingOp->getOperands(); 197 auto resultAliases = aliases.resolve(allocValue); 198 // Determine the common dominator block of all aliases. 199 Block *dominatorBlock = 200 findCommonDominator(allocValue, resultAliases, dominators); 201 // Init the initial hoisting state. 202 StateT state(&dominators, allocValue, allocValue.getParentBlock()); 203 // Check for additional allocation dependencies to compute an upper bound 204 // for hoisting. 205 Block *dependencyBlock = nullptr; 206 // If this node has dependencies, check all dependent nodes. This ensures 207 // that all dependency values have been computed before allocating the 208 // buffer. 209 for (Value depValue : operands) { 210 Block *depBlock = depValue.getParentBlock(); 211 if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock)) 212 dependencyBlock = depBlock; 213 } 214 215 // Find the actual placement block and determine the start operation using 216 // an upper placement-block boundary. The idea is that placement block 217 // cannot be moved any further upwards than the given upper bound. 218 Block *placementBlock = findPlacementBlock( 219 state, state.computeUpperBound(dominatorBlock, dependencyBlock)); 220 Operation *startOperation = BufferPlacementAllocs::getStartOperation( 221 allocValue, placementBlock, liveness); 222 223 // Move the alloc in front of the start operation. 224 Operation *allocOperation = allocValue.getDefiningOp(); 225 allocOperation->moveBefore(startOperation); 226 } 227 } 228 229 private: 230 /// Finds a valid placement block by walking upwards in the CFG until we 231 /// either cannot continue our walk due to constraints (given by the StateT 232 /// implementation) or we have reached the upper-most dominator block. 233 Block *findPlacementBlock(StateT &state, Block *upperBound) { 234 Block *currentBlock = state.placementBlock; 235 // Walk from the innermost regions/loops to the outermost regions/loops and 236 // find an appropriate placement block that satisfies the constraint of the 237 // current StateT implementation. Walk until we reach the upperBound block 238 // (if any). 239 240 // If we are not able to find a valid parent operation or an associated 241 // parent block, break the walk loop. 242 Operation *parentOp; 243 Block *parentBlock; 244 while ((parentOp = currentBlock->getParentOp()) && 245 (parentBlock = parentOp->getBlock()) && 246 (!upperBound || 247 dominators.properlyDominates(upperBound, currentBlock))) { 248 // Try to find an immediate dominator and check whether the parent block 249 // is above the immediate dominator (if any). 250 DominanceInfoNode *idom = nullptr; 251 252 // DominanceInfo doesn't support getNode queries for single-block regions. 253 if (!currentBlock->isEntryBlock()) 254 idom = dominators.getNode(currentBlock)->getIDom(); 255 256 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) { 257 // If the current immediate dominator is below the placement block, move 258 // to the immediate dominator block. 259 currentBlock = idom->getBlock(); 260 state.recordMoveToDominator(currentBlock); 261 } else { 262 // We have to move to our parent block since an immediate dominator does 263 // either not exist or is above our parent block. If we cannot move to 264 // our parent operation due to constraints given by the StateT 265 // implementation, break the walk loop. Furthermore, we should not move 266 // allocations out of unknown region-based control-flow operations. 267 if (!isKnownControlFlowInterface(parentOp) || 268 !state.isLegalPlacement(parentOp)) 269 break; 270 // Move to our parent block by notifying the current StateT 271 // implementation. 272 currentBlock = parentBlock; 273 state.recordMoveToParent(currentBlock); 274 } 275 } 276 // Return the finally determined placement block. 277 return state.placementBlock; 278 } 279 280 /// The dominator info to find the appropriate start operation to move the 281 /// allocs. 282 DominanceInfo dominators; 283 284 /// The post dominator info to move the dependent allocs in the right 285 /// position. 286 PostDominanceInfo postDominators; 287 288 /// The map storing the final placement blocks of a given alloc value. 289 llvm::DenseMap<Value, Block *> placementBlocks; 290 291 /// The operation that this transformation is working on. It is used to also 292 /// gather allocas. 293 Operation *scopeOp; 294 }; 295 296 /// A state implementation compatible with the `BufferAllocationHoisting` class 297 /// that hoists allocations into dominator blocks while keeping them inside of 298 /// loops. 299 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase { 300 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; 301 302 /// Computes the upper bound for the placement block search. 303 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { 304 // If we do not have a dependency block, the upper bound is given by the 305 // dominator block. 306 if (!dependencyBlock) 307 return dominatorBlock; 308 309 // Find the "lower" block of the dominator and the dependency block to 310 // ensure that we do not move allocations above this block. 311 return dominators->properlyDominates(dominatorBlock, dependencyBlock) 312 ? dependencyBlock 313 : dominatorBlock; 314 } 315 316 /// Returns true if the given operation does not represent a loop. 317 bool isLegalPlacement(Operation *op) { return !isLoop(op); } 318 319 /// Returns true if the given operation should be considered for hoisting. 320 static bool shouldHoistOpType(Operation *op) { 321 return allowAllocDominateBlockHoisting(op); 322 } 323 324 /// Sets the current placement block to the given block. 325 void recordMoveToDominator(Block *block) { placementBlock = block; } 326 327 /// Sets the current placement block to the given block. 328 void recordMoveToParent(Block *block) { recordMoveToDominator(block); } 329 }; 330 331 /// A state implementation compatible with the `BufferAllocationHoisting` class 332 /// that hoists allocations out of loops. 333 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase { 334 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; 335 336 /// Remembers the dominator block of all aliases. 337 Block *aliasDominatorBlock = nullptr; 338 339 /// Computes the upper bound for the placement block search. 340 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { 341 aliasDominatorBlock = dominatorBlock; 342 // If there is a dependency block, we have to use this block as an upper 343 // bound to satisfy all allocation value dependencies. 344 return dependencyBlock ? dependencyBlock : nullptr; 345 } 346 347 /// Returns true if the given operation represents a loop with sequential 348 /// execution semantics and one of the aliases caused the 349 /// `aliasDominatorBlock` to be "above" the block of the given loop operation. 350 /// If this is the case, it indicates that the allocation is passed via a back 351 /// edge. 352 bool isLegalPlacement(Operation *op) { 353 return isSequentialLoop(op) && 354 !dominators->dominates(aliasDominatorBlock, op->getBlock()); 355 } 356 357 /// Returns true if the given operation should be considered for hoisting. 358 static bool shouldHoistOpType(Operation *op) { 359 return allowAllocLoopHoisting(op); 360 } 361 362 /// Does not change the internal placement block, as we want to move 363 /// operations out of loops only. 364 void recordMoveToDominator(Block *block) {} 365 366 /// Sets the current placement block to the given block. 367 void recordMoveToParent(Block *block) { placementBlock = block; } 368 }; 369 370 //===----------------------------------------------------------------------===// 371 // BufferPlacementPromotion 372 //===----------------------------------------------------------------------===// 373 374 /// Promotes heap-based allocations to stack-based allocations (if possible). 375 class BufferPlacementPromotion : BufferPlacementTransformationBase { 376 public: 377 BufferPlacementPromotion(Operation *op) 378 : BufferPlacementTransformationBase(op) {} 379 380 /// Promote buffers to stack-based allocations. 381 void promote(function_ref<bool(Value)> isSmallAlloc) { 382 for (BufferPlacementAllocs::AllocEntry &entry : allocs) { 383 Value alloc = std::get<0>(entry); 384 Operation *dealloc = std::get<1>(entry); 385 // Checking several requirements to transform an AllocOp into an AllocaOp. 386 // The transformation is done if the allocation is limited to a given 387 // size. Furthermore, a deallocation must not be defined for this 388 // allocation entry and a parent allocation scope must exist. 389 if (!isSmallAlloc(alloc) || dealloc || 390 !hasAllocationScope(alloc, aliases)) 391 continue; 392 393 Operation *startOperation = BufferPlacementAllocs::getStartOperation( 394 alloc, alloc.getParentBlock(), liveness); 395 // Build a new alloca that is associated with its parent 396 // `AutomaticAllocationScope` determined during the initialization phase. 397 OpBuilder builder(startOperation); 398 Operation *allocOp = alloc.getDefiningOp(); 399 if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) { 400 Operation *alloca = 401 allocInterface.buildPromotedAlloc(builder, alloc).value(); 402 if (!alloca) 403 continue; 404 // Replace the original alloc by a newly created alloca. 405 allocOp->replaceAllUsesWith(alloca); 406 allocOp->erase(); 407 } 408 } 409 } 410 }; 411 412 //===----------------------------------------------------------------------===// 413 // BufferOptimizationPasses 414 //===----------------------------------------------------------------------===// 415 416 /// The buffer hoisting pass that hoists allocation nodes into dominating 417 /// blocks. 418 struct BufferHoistingPass 419 : public bufferization::impl::BufferHoistingBase<BufferHoistingPass> { 420 421 void runOnOperation() override { 422 // Hoist all allocations into dominator blocks. 423 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer( 424 getOperation()); 425 optimizer.hoist(); 426 } 427 }; 428 429 /// The buffer loop hoisting pass that hoists allocation nodes out of loops. 430 struct BufferLoopHoistingPass 431 : public bufferization::impl::BufferLoopHoistingBase< 432 BufferLoopHoistingPass> { 433 434 void runOnOperation() override { 435 // Hoist all allocations out of loops. 436 hoistBuffersFromLoops(getOperation()); 437 } 438 }; 439 440 /// The promote buffer to stack pass that tries to convert alloc nodes into 441 /// alloca nodes. 442 class PromoteBuffersToStackPass 443 : public bufferization::impl::PromoteBuffersToStackBase< 444 PromoteBuffersToStackPass> { 445 public: 446 PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes, 447 unsigned maxRankOfAllocatedMemRef) { 448 this->maxAllocSizeInBytes = maxAllocSizeInBytes; 449 this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef; 450 } 451 452 explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc) 453 : isSmallAlloc(std::move(isSmallAlloc)) {} 454 455 LogicalResult initialize(MLIRContext *context) override { 456 if (isSmallAlloc == nullptr) { 457 isSmallAlloc = [=](Value alloc) { 458 return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes, 459 maxRankOfAllocatedMemRef); 460 }; 461 } 462 return success(); 463 } 464 465 void runOnOperation() override { 466 // Move all allocation nodes and convert candidates into allocas. 467 BufferPlacementPromotion optimizer(getOperation()); 468 optimizer.promote(isSmallAlloc); 469 } 470 471 private: 472 std::function<bool(Value)> isSmallAlloc; 473 }; 474 475 } // namespace 476 477 void mlir::bufferization::hoistBuffersFromLoops(Operation *op) { 478 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op); 479 optimizer.hoist(); 480 } 481 482 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() { 483 return std::make_unique<BufferHoistingPass>(); 484 } 485 486 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() { 487 return std::make_unique<BufferLoopHoistingPass>(); 488 } 489 490 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( 491 unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) { 492 return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes, 493 maxRankOfAllocatedMemRef); 494 } 495 496 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( 497 std::function<bool(Value)> isSmallAlloc) { 498 return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc)); 499 } 500