1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for three optimization passes. The first two 10 // passes try to move alloc nodes out of blocks to reduce the number of 11 // allocations and copies during buffer deallocation. The third pass tries to 12 // convert heap-based allocations to stack-based allocations, if possible. 13 14 #include "PassDetail.h" 15 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" 16 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 17 #include "mlir/Dialect/MemRef/IR/MemRef.h" 18 #include "mlir/IR/Operation.h" 19 #include "mlir/Interfaces/LoopLikeInterface.h" 20 #include "mlir/Pass/Pass.h" 21 22 using namespace mlir; 23 using namespace mlir::bufferization; 24 25 /// Returns true if the given operation implements a known high-level region- 26 /// based control-flow interface. 27 static bool isKnownControlFlowInterface(Operation *op) { 28 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op); 29 } 30 31 /// Check if the size of the allocation is less than the given size. The 32 /// transformation is only applied to small buffers since large buffers could 33 /// exceed the stack space. 34 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, 35 unsigned maxRankOfAllocatedMemRef) { 36 auto type = alloc.getType().dyn_cast<ShapedType>(); 37 if (!type || !alloc.getDefiningOp<memref::AllocOp>()) 38 return false; 39 if (!type.hasStaticShape()) { 40 // Check if the dynamic shape dimension of the alloc is produced by 41 // `memref.rank`. If this is the case, it is likely to be small. 42 // Furthermore, the dimension is limited to the maximum rank of the 43 // allocated memref to avoid large values by multiplying several small 44 // values. 45 if (type.getRank() <= maxRankOfAllocatedMemRef) { 46 return llvm::all_of(alloc.getDefiningOp()->getOperands(), 47 [&](Value operand) { 48 return operand.getDefiningOp<memref::RankOp>(); 49 }); 50 } 51 return false; 52 } 53 unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp()) 54 .getTypeSizeInBits(type.getElementType()); 55 return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8; 56 } 57 58 /// Checks whether the given aliases leave the allocation scope. 59 static bool 60 leavesAllocationScope(Region *parentRegion, 61 const BufferViewFlowAnalysis::ValueSetT &aliases) { 62 for (Value alias : aliases) { 63 for (auto *use : alias.getUsers()) { 64 // If there is at least one alias that leaves the parent region, we know 65 // that this alias escapes the whole region and hence the associated 66 // allocation leaves allocation scope. 67 if (isRegionReturnLike(use) && use->getParentRegion() == parentRegion) 68 return true; 69 } 70 } 71 return false; 72 } 73 74 /// Checks, if an automated allocation scope for a given alloc value exists. 75 static bool hasAllocationScope(Value alloc, 76 const BufferViewFlowAnalysis &aliasAnalysis) { 77 Region *region = alloc.getParentRegion(); 78 do { 79 if (Operation *parentOp = region->getParentOp()) { 80 // Check if the operation is an automatic allocation scope and whether an 81 // alias leaves the scope. This means, an allocation yields out of 82 // this scope and can not be transformed in a stack-based allocation. 83 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() && 84 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc))) 85 return true; 86 // Check if the operation is a known control flow interface and break the 87 // loop to avoid transformation in loops. Furthermore skip transformation 88 // if the operation does not implement a RegionBeanchOpInterface. 89 if (BufferPlacementTransformationBase::isLoop(parentOp) || 90 !isKnownControlFlowInterface(parentOp)) 91 break; 92 } 93 } while ((region = region->getParentRegion())); 94 return false; 95 } 96 97 namespace { 98 99 //===----------------------------------------------------------------------===// 100 // BufferAllocationHoisting 101 //===----------------------------------------------------------------------===// 102 103 /// A base implementation compatible with the `BufferAllocationHoisting` class. 104 struct BufferAllocationHoistingStateBase { 105 /// A pointer to the current dominance info. 106 DominanceInfo *dominators; 107 108 /// The current allocation value. 109 Value allocValue; 110 111 /// The current placement block (if any). 112 Block *placementBlock; 113 114 /// Initializes the state base. 115 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue, 116 Block *placementBlock) 117 : dominators(dominators), allocValue(allocValue), 118 placementBlock(placementBlock) {} 119 }; 120 121 /// Implements the actual hoisting logic for allocation nodes. 122 template <typename StateT> 123 class BufferAllocationHoisting : public BufferPlacementTransformationBase { 124 public: 125 BufferAllocationHoisting(Operation *op) 126 : BufferPlacementTransformationBase(op), dominators(op), 127 postDominators(op), scopeOp(op) {} 128 129 /// Moves allocations upwards. 130 void hoist() { 131 SmallVector<Value> allocsAndAllocas; 132 for (BufferPlacementAllocs::AllocEntry &entry : allocs) 133 allocsAndAllocas.push_back(std::get<0>(entry)); 134 scopeOp->walk([&](memref::AllocaOp op) { 135 allocsAndAllocas.push_back(op.getMemref()); 136 }); 137 138 for (auto allocValue : allocsAndAllocas) { 139 if (!StateT::shouldHoistOpType(allocValue.getDefiningOp())) 140 continue; 141 Operation *definingOp = allocValue.getDefiningOp(); 142 assert(definingOp && "No defining op"); 143 auto operands = definingOp->getOperands(); 144 auto resultAliases = aliases.resolve(allocValue); 145 // Determine the common dominator block of all aliases. 146 Block *dominatorBlock = 147 findCommonDominator(allocValue, resultAliases, dominators); 148 // Init the initial hoisting state. 149 StateT state(&dominators, allocValue, allocValue.getParentBlock()); 150 // Check for additional allocation dependencies to compute an upper bound 151 // for hoisting. 152 Block *dependencyBlock = nullptr; 153 // If this node has dependencies, check all dependent nodes. This ensures 154 // that all dependency values have been computed before allocating the 155 // buffer. 156 for (Value depValue : operands) { 157 Block *depBlock = depValue.getParentBlock(); 158 if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock)) 159 dependencyBlock = depBlock; 160 } 161 162 // Find the actual placement block and determine the start operation using 163 // an upper placement-block boundary. The idea is that placement block 164 // cannot be moved any further upwards than the given upper bound. 165 Block *placementBlock = findPlacementBlock( 166 state, state.computeUpperBound(dominatorBlock, dependencyBlock)); 167 Operation *startOperation = BufferPlacementAllocs::getStartOperation( 168 allocValue, placementBlock, liveness); 169 170 // Move the alloc in front of the start operation. 171 Operation *allocOperation = allocValue.getDefiningOp(); 172 allocOperation->moveBefore(startOperation); 173 } 174 } 175 176 private: 177 /// Finds a valid placement block by walking upwards in the CFG until we 178 /// either cannot continue our walk due to constraints (given by the StateT 179 /// implementation) or we have reached the upper-most dominator block. 180 Block *findPlacementBlock(StateT &state, Block *upperBound) { 181 Block *currentBlock = state.placementBlock; 182 // Walk from the innermost regions/loops to the outermost regions/loops and 183 // find an appropriate placement block that satisfies the constraint of the 184 // current StateT implementation. Walk until we reach the upperBound block 185 // (if any). 186 187 // If we are not able to find a valid parent operation or an associated 188 // parent block, break the walk loop. 189 Operation *parentOp; 190 Block *parentBlock; 191 while ((parentOp = currentBlock->getParentOp()) && 192 (parentBlock = parentOp->getBlock()) && 193 (!upperBound || 194 dominators.properlyDominates(upperBound, currentBlock))) { 195 // Try to find an immediate dominator and check whether the parent block 196 // is above the immediate dominator (if any). 197 DominanceInfoNode *idom = nullptr; 198 199 // DominanceInfo doesn't support getNode queries for single-block regions. 200 if (!currentBlock->isEntryBlock()) 201 idom = dominators.getNode(currentBlock)->getIDom(); 202 203 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) { 204 // If the current immediate dominator is below the placement block, move 205 // to the immediate dominator block. 206 currentBlock = idom->getBlock(); 207 state.recordMoveToDominator(currentBlock); 208 } else { 209 // We have to move to our parent block since an immediate dominator does 210 // either not exist or is above our parent block. If we cannot move to 211 // our parent operation due to constraints given by the StateT 212 // implementation, break the walk loop. Furthermore, we should not move 213 // allocations out of unknown region-based control-flow operations. 214 if (!isKnownControlFlowInterface(parentOp) || 215 !state.isLegalPlacement(parentOp)) 216 break; 217 // Move to our parent block by notifying the current StateT 218 // implementation. 219 currentBlock = parentBlock; 220 state.recordMoveToParent(currentBlock); 221 } 222 } 223 // Return the finally determined placement block. 224 return state.placementBlock; 225 } 226 227 /// The dominator info to find the appropriate start operation to move the 228 /// allocs. 229 DominanceInfo dominators; 230 231 /// The post dominator info to move the dependent allocs in the right 232 /// position. 233 PostDominanceInfo postDominators; 234 235 /// The map storing the final placement blocks of a given alloc value. 236 llvm::DenseMap<Value, Block *> placementBlocks; 237 238 /// The operation that this transformation is working on. It is used to also 239 /// gather allocas. 240 Operation *scopeOp; 241 }; 242 243 /// A state implementation compatible with the `BufferAllocationHoisting` class 244 /// that hoists allocations into dominator blocks while keeping them inside of 245 /// loops. 246 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase { 247 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; 248 249 /// Computes the upper bound for the placement block search. 250 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { 251 // If we do not have a dependency block, the upper bound is given by the 252 // dominator block. 253 if (!dependencyBlock) 254 return dominatorBlock; 255 256 // Find the "lower" block of the dominator and the dependency block to 257 // ensure that we do not move allocations above this block. 258 return dominators->properlyDominates(dominatorBlock, dependencyBlock) 259 ? dependencyBlock 260 : dominatorBlock; 261 } 262 263 /// Returns true if the given operation does not represent a loop. 264 bool isLegalPlacement(Operation *op) { 265 return !BufferPlacementTransformationBase::isLoop(op); 266 } 267 268 /// Returns true if the given operation should be considered for hoisting. 269 static bool shouldHoistOpType(Operation *op) { 270 return llvm::isa<memref::AllocOp>(op); 271 } 272 273 /// Sets the current placement block to the given block. 274 void recordMoveToDominator(Block *block) { placementBlock = block; } 275 276 /// Sets the current placement block to the given block. 277 void recordMoveToParent(Block *block) { recordMoveToDominator(block); } 278 }; 279 280 /// A state implementation compatible with the `BufferAllocationHoisting` class 281 /// that hoists allocations out of loops. 282 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase { 283 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; 284 285 /// Remembers the dominator block of all aliases. 286 Block *aliasDominatorBlock = nullptr; 287 288 /// Computes the upper bound for the placement block search. 289 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { 290 aliasDominatorBlock = dominatorBlock; 291 // If there is a dependency block, we have to use this block as an upper 292 // bound to satisfy all allocation value dependencies. 293 return dependencyBlock ? dependencyBlock : nullptr; 294 } 295 296 /// Returns true if the given operation represents a loop and one of the 297 /// aliases caused the `aliasDominatorBlock` to be "above" the block of the 298 /// given loop operation. If this is the case, it indicates that the 299 /// allocation is passed via a back edge. 300 bool isLegalPlacement(Operation *op) { 301 return BufferPlacementTransformationBase::isLoop(op) && 302 !dominators->dominates(aliasDominatorBlock, op->getBlock()); 303 } 304 305 /// Returns true if the given operation should be considered for hoisting. 306 static bool shouldHoistOpType(Operation *op) { 307 return llvm::isa<memref::AllocOp, memref::AllocaOp>(op); 308 } 309 310 /// Does not change the internal placement block, as we want to move 311 /// operations out of loops only. 312 void recordMoveToDominator(Block *block) {} 313 314 /// Sets the current placement block to the given block. 315 void recordMoveToParent(Block *block) { placementBlock = block; } 316 }; 317 318 //===----------------------------------------------------------------------===// 319 // BufferPlacementPromotion 320 //===----------------------------------------------------------------------===// 321 322 /// Promotes heap-based allocations to stack-based allocations (if possible). 323 class BufferPlacementPromotion : BufferPlacementTransformationBase { 324 public: 325 BufferPlacementPromotion(Operation *op) 326 : BufferPlacementTransformationBase(op) {} 327 328 /// Promote buffers to stack-based allocations. 329 void promote(function_ref<bool(Value)> isSmallAlloc) { 330 for (BufferPlacementAllocs::AllocEntry &entry : allocs) { 331 Value alloc = std::get<0>(entry); 332 Operation *dealloc = std::get<1>(entry); 333 // Checking several requirements to transform an AllocOp into an AllocaOp. 334 // The transformation is done if the allocation is limited to a given 335 // size. Furthermore, a deallocation must not be defined for this 336 // allocation entry and a parent allocation scope must exist. 337 if (!isSmallAlloc(alloc) || dealloc || 338 !hasAllocationScope(alloc, aliases)) 339 continue; 340 341 Operation *startOperation = BufferPlacementAllocs::getStartOperation( 342 alloc, alloc.getParentBlock(), liveness); 343 // Build a new alloca that is associated with its parent 344 // `AutomaticAllocationScope` determined during the initialization phase. 345 OpBuilder builder(startOperation); 346 Operation *allocOp = alloc.getDefiningOp(); 347 Operation *alloca = builder.create<memref::AllocaOp>( 348 alloc.getLoc(), alloc.getType().cast<MemRefType>(), 349 allocOp->getOperands()); 350 351 // Replace the original alloc by a newly created alloca. 352 allocOp->replaceAllUsesWith(alloca); 353 allocOp->erase(); 354 } 355 } 356 }; 357 358 //===----------------------------------------------------------------------===// 359 // BufferOptimizationPasses 360 //===----------------------------------------------------------------------===// 361 362 /// The buffer hoisting pass that hoists allocation nodes into dominating 363 /// blocks. 364 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> { 365 366 void runOnOperation() override { 367 // Hoist all allocations into dominator blocks. 368 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer( 369 getOperation()); 370 optimizer.hoist(); 371 } 372 }; 373 374 /// The buffer loop hoisting pass that hoists allocation nodes out of loops. 375 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> { 376 377 void runOnOperation() override { 378 // Hoist all allocations out of loops. 379 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer( 380 getOperation()); 381 optimizer.hoist(); 382 } 383 }; 384 385 /// The promote buffer to stack pass that tries to convert alloc nodes into 386 /// alloca nodes. 387 class PromoteBuffersToStackPass 388 : public PromoteBuffersToStackBase<PromoteBuffersToStackPass> { 389 public: 390 PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes, 391 unsigned maxRankOfAllocatedMemRef) { 392 this->maxAllocSizeInBytes = maxAllocSizeInBytes; 393 this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef; 394 } 395 396 explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc) 397 : isSmallAlloc(std::move(isSmallAlloc)) {} 398 399 LogicalResult initialize(MLIRContext *context) override { 400 if (isSmallAlloc == nullptr) { 401 isSmallAlloc = [=](Value alloc) { 402 return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes, 403 maxRankOfAllocatedMemRef); 404 }; 405 } 406 return success(); 407 } 408 409 void runOnOperation() override { 410 // Move all allocation nodes and convert candidates into allocas. 411 BufferPlacementPromotion optimizer(getOperation()); 412 optimizer.promote(isSmallAlloc); 413 } 414 415 private: 416 std::function<bool(Value)> isSmallAlloc; 417 }; 418 419 } // namespace 420 421 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() { 422 return std::make_unique<BufferHoistingPass>(); 423 } 424 425 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() { 426 return std::make_unique<BufferLoopHoistingPass>(); 427 } 428 429 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( 430 unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) { 431 return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes, 432 maxRankOfAllocatedMemRef); 433 } 434 435 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( 436 std::function<bool(Value)> isSmallAlloc) { 437 return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc)); 438 } 439