1 //===- OwnershipBasedBufferDeallocation.cpp - impl. for buffer dealloc. ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for computing correct `bufferization.dealloc` 10 // positions. Furthermore, buffer deallocation also adds required new clone 11 // operations to ensure that memrefs returned by functions never alias an 12 // argument. 13 // 14 // TODO: 15 // The current implementation does not support explicit-control-flow loops and 16 // the resulting code will be invalid with respect to program semantics. 17 // However, structured control-flow loops are fully supported. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h" 22 #include "mlir/Dialect/Bufferization/IR/Bufferization.h" 23 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 24 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" 25 #include "mlir/Dialect/Func/IR/FuncOps.h" 26 #include "mlir/Dialect/MemRef/IR/MemRef.h" 27 #include "mlir/Dialect/SCF/IR/SCF.h" 28 #include "mlir/IR/Iterators.h" 29 #include "mlir/Interfaces/ControlFlowInterfaces.h" 30 31 namespace mlir { 32 namespace bufferization { 33 #define GEN_PASS_DEF_OWNERSHIPBASEDBUFFERDEALLOCATION 34 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc" 35 } // namespace bufferization 36 } // namespace mlir 37 38 using namespace mlir; 39 using namespace mlir::bufferization; 40 41 //===----------------------------------------------------------------------===// 42 // Helpers 43 //===----------------------------------------------------------------------===// 44 45 static Value buildBoolValue(OpBuilder &builder, Location loc, bool value) { 46 return builder.create<arith::ConstantOp>(loc, builder.getBoolAttr(value)); 47 } 48 49 static bool isMemref(Value v) { return isa<BaseMemRefType>(v.getType()); } 50 51 /// Return "true" if the given op is guaranteed to have neither "Allocate" nor 52 /// "Free" side effects. 53 static bool hasNeitherAllocateNorFreeSideEffect(Operation *op) { 54 if (isa<MemoryEffectOpInterface>(op)) 55 return !hasEffect<MemoryEffects::Allocate>(op) && 56 !hasEffect<MemoryEffects::Free>(op); 57 // If the op does not implement the MemoryEffectOpInterface but has has 58 // recursive memory effects, then this op in isolation (without its body) does 59 // not have any side effects. All the ops inside the regions of this op will 60 // be processed separately. 61 return op->hasTrait<OpTrait::HasRecursiveMemoryEffects>(); 62 } 63 64 /// Return "true" if the given op has buffer semantics. I.e., it has buffer 65 /// operands, buffer results and/or buffer region entry block arguments. 66 static bool hasBufferSemantics(Operation *op) { 67 if (llvm::any_of(op->getOperands(), isMemref) || 68 llvm::any_of(op->getResults(), isMemref)) 69 return true; 70 for (Region ®ion : op->getRegions()) 71 if (!region.empty()) 72 if (llvm::any_of(region.front().getArguments(), isMemref)) 73 return true; 74 return false; 75 } 76 77 //===----------------------------------------------------------------------===// 78 // Backedges analysis 79 //===----------------------------------------------------------------------===// 80 81 namespace { 82 83 /// A straight-forward program analysis which detects loop backedges induced by 84 /// explicit control flow. 85 class Backedges { 86 public: 87 using BlockSetT = SmallPtrSet<Block *, 16>; 88 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>; 89 90 public: 91 /// Constructs a new backedges analysis using the op provided. 92 Backedges(Operation *op) { recurse(op); } 93 94 /// Returns the number of backedges formed by explicit control flow. 95 size_t size() const { return edgeSet.size(); } 96 97 /// Returns the start iterator to loop over all backedges. 98 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } 99 100 /// Returns the end iterator to loop over all backedges. 101 BackedgeSetT::const_iterator end() const { return edgeSet.end(); } 102 103 private: 104 /// Enters the current block and inserts a backedge into the `edgeSet` if we 105 /// have already visited the current block. The inserted edge links the given 106 /// `predecessor` with the `current` block. 107 bool enter(Block ¤t, Block *predecessor) { 108 bool inserted = visited.insert(¤t).second; 109 if (!inserted) 110 edgeSet.insert(std::make_pair(predecessor, ¤t)); 111 return inserted; 112 } 113 114 /// Leaves the current block. 115 void exit(Block ¤t) { visited.erase(¤t); } 116 117 /// Recurses into the given operation while taking all attached regions into 118 /// account. 119 void recurse(Operation *op) { 120 Block *current = op->getBlock(); 121 // If the current op implements the `BranchOpInterface`, there can be 122 // cycles in the scope of all successor blocks. 123 if (isa<BranchOpInterface>(op)) { 124 for (Block *succ : current->getSuccessors()) 125 recurse(*succ, current); 126 } 127 // Recurse into all distinct regions and check for explicit control-flow 128 // loops. 129 for (Region ®ion : op->getRegions()) { 130 if (!region.empty()) 131 recurse(region.front(), current); 132 } 133 } 134 135 /// Recurses into explicit control-flow structures that are given by 136 /// the successor relation defined on the block level. 137 void recurse(Block &block, Block *predecessor) { 138 // Try to enter the current block. If this is not possible, we are 139 // currently processing this block and can safely return here. 140 if (!enter(block, predecessor)) 141 return; 142 143 // Recurse into all operations and successor blocks. 144 for (Operation &op : block.getOperations()) 145 recurse(&op); 146 147 // Leave the current block. 148 exit(block); 149 } 150 151 /// Stores all blocks that are currently visited and on the processing stack. 152 BlockSetT visited; 153 154 /// Stores all backedges in the format (source, target). 155 BackedgeSetT edgeSet; 156 }; 157 158 } // namespace 159 160 //===----------------------------------------------------------------------===// 161 // BufferDeallocation 162 //===----------------------------------------------------------------------===// 163 164 namespace { 165 /// The buffer deallocation transformation which ensures that all allocs in the 166 /// program have a corresponding de-allocation. 167 class BufferDeallocation { 168 public: 169 BufferDeallocation(Operation *op, DeallocationOptions options) 170 : state(op), options(options) {} 171 172 /// Performs the actual placement/creation of all dealloc operations. 173 LogicalResult deallocate(FunctionOpInterface op); 174 175 private: 176 /// The base case for the recursive template below. 177 template <typename... T> 178 typename std::enable_if<sizeof...(T) == 0, FailureOr<Operation *>>::type 179 handleOp(Operation *op) { 180 return op; 181 } 182 183 /// Applies all the handlers of the interfaces in the template list 184 /// implemented by 'op'. In particular, if an operation implements more than 185 /// one of the interfaces in the template list, all the associated handlers 186 /// will be applied to the operation in the same order as the template list 187 /// specifies. If a handler reports a failure or removes the operation without 188 /// replacement (indicated by returning 'nullptr'), no further handlers are 189 /// applied and the return value is propagated to the caller of 'handleOp'. 190 /// 191 /// The interface handlers job is to update the deallocation state, most 192 /// importantly the ownership map and list of memrefs to potentially be 193 /// deallocated per block, but also to insert `bufferization.dealloc` 194 /// operations where needed. Obviously, no MemRefs that may be used at a later 195 /// point in the control-flow may be deallocated and the ownership map has to 196 /// be updated to reflect potential ownership changes caused by the dealloc 197 /// operation (e.g., if two interfaces on the same op insert a dealloc 198 /// operation each, the second one should query the ownership map and use them 199 /// as deallocation condition such that MemRefs already deallocated in the 200 /// first dealloc operation are not deallocated a second time (double-free)). 201 /// Note that currently only the interfaces on terminators may insert dealloc 202 /// operations and it is verified as a precondition that a terminator op must 203 /// implement exactly one of the interfaces handling dealloc insertion. 204 /// 205 /// The return value of the 'handleInterface' functions should be a 206 /// FailureOr<Operation *> indicating whether there was a failure or otherwise 207 /// returning the operation itself or a replacement operation. 208 /// 209 /// Note: The difference compared to `TypeSwitch` is that all 210 /// matching cases are applied instead of just the first match. 211 template <typename InterfaceT, typename... InterfacesU> 212 FailureOr<Operation *> handleOp(Operation *op) { 213 Operation *next = op; 214 if (auto concreteOp = dyn_cast<InterfaceT>(op)) { 215 FailureOr<Operation *> result = handleInterface(concreteOp); 216 if (failed(result)) 217 return failure(); 218 next = *result; 219 } 220 if (!next) 221 return FailureOr<Operation *>(nullptr); 222 return handleOp<InterfacesU...>(next); 223 } 224 225 /// Apply all supported interface handlers to the given op. 226 FailureOr<Operation *> handleAllInterfaces(Operation *op) { 227 if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(op)) 228 return deallocOpInterface.process(state, options); 229 230 if (failed(verifyOperationPreconditions(op))) 231 return failure(); 232 233 return handleOp<MemoryEffectOpInterface, RegionBranchOpInterface, 234 CallOpInterface, BranchOpInterface, 235 RegionBranchTerminatorOpInterface>(op); 236 } 237 238 /// Make sure that for each forwarded MemRef value, an ownership indicator 239 /// `i1` value is forwarded as well such that the successor block knows 240 /// whether the MemRef has to be deallocated. 241 /// 242 /// Example: 243 /// ``` 244 /// ^bb1: 245 /// <more ops...> 246 /// cf.br ^bb2(<forward-to-bb2>) 247 /// ``` 248 /// becomes 249 /// ``` 250 /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1) 251 /// // let r = getMemrefsToRetain(bb1, bb2, <forward-to-bb2>) 252 /// ^bb1: 253 /// <more ops...> 254 /// o = bufferization.dealloc m if c retain r 255 /// // replace ownership(r) with o element-wise 256 /// cf.br ^bb2(<forward-to-bb2>, o) 257 /// ``` 258 FailureOr<Operation *> handleInterface(BranchOpInterface op); 259 260 /// Add an ownership indicator for every forwarding MemRef operand and result. 261 /// Nested regions never take ownership of MemRefs owned by a parent region 262 /// (neither via forwarding operand nor when captured implicitly when the 263 /// region is not isolated from above). Ownerships will only be passed to peer 264 /// regions (when an operation has multiple regions, such as scf.while), or to 265 /// parent regions. 266 /// Note that the block arguments in the nested region are currently handled 267 /// centrally in the 'dealloc' function, but better interface support could 268 /// allow us to do this here for the nested region specifically to reduce the 269 /// amount of assumptions we make on the structure of ops implementing this 270 /// interface. 271 /// 272 /// Example: 273 /// ``` 274 /// %ret = scf.for %i = %c0 to %c10 step %c1 iter_args(%m = %memref) { 275 /// <more ops...> 276 /// scf.yield %m : memref<2xi32>, i1 277 /// } 278 /// ``` 279 /// becomes 280 /// ``` 281 /// %ret:2 = scf.for %i = %c0 to %c10 step %c1 282 /// iter_args(%m = %memref, %own = %false) { 283 /// <more ops...> 284 /// // Note that the scf.yield is handled by the 285 /// // RegionBranchTerminatorOpInterface (not this handler) 286 /// // let o = getMemrefWithUniqueOwnership(%own) 287 /// scf.yield %m, o : memref<2xi32>, i1 288 /// } 289 /// ``` 290 FailureOr<Operation *> handleInterface(RegionBranchOpInterface op); 291 292 /// If the private-function-dynamic-ownership pass option is enabled and the 293 /// called function is private, additional results are added for each MemRef 294 /// result to pass the dynamic ownership indicator along. Otherwise, updates 295 /// the ownership map and list of memrefs to be deallocated according to the 296 /// function boundary ABI, i.e., assume ownership of all returned MemRefs. 297 /// 298 /// Example (assume `private-function-dynamic-ownership` is enabled): 299 /// ``` 300 /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...} 301 /// func.func private @g(%arg0: memref<2xi32>) -> memref<2xi32> {...} 302 /// 303 /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32> 304 /// %ret_g = func.call @g(%memref) : (memref<2xi32>) -> memref<2xi32> 305 /// ``` 306 /// becomes 307 /// ``` 308 /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...} 309 /// func.func private @g(%arg0: memref<2xi32>) -> (memref<2xi32>, i1) {...} 310 /// 311 /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32> 312 /// // set ownership(%ret_f) := true 313 /// // remember to deallocate %ret_f 314 /// 315 /// %ret_g:2 = func.call @g(%memref) : (memref<2xi32>) -> (memref<2xi32>, i1) 316 /// // set ownership(%ret_g#0) := %ret_g#1 317 /// // remember to deallocate %ret_g if it comes with ownership 318 /// ``` 319 FailureOr<Operation *> handleInterface(CallOpInterface op); 320 321 /// Takes care of allocation and free side-effects. It collects allocated 322 /// MemRefs that we have to add to manually deallocate, but also removes 323 /// values again that are already deallocated before the end of the block. It 324 /// also updates the ownership map accordingly. 325 /// 326 /// Example: 327 /// ``` 328 /// %alloc = memref.alloc() 329 /// %alloca = memref.alloca() 330 /// ``` 331 /// becomes 332 /// ``` 333 /// %alloc = memref.alloc() 334 /// %alloca = memref.alloca() 335 /// // set ownership(alloc) := true 336 /// // set ownership(alloca) := false 337 /// // remember to deallocate %alloc 338 /// ``` 339 FailureOr<Operation *> handleInterface(MemoryEffectOpInterface op); 340 341 /// Takes care that the function boundary ABI is adhered to if the parent 342 /// operation implements FunctionOpInterface, inserting a 343 /// `bufferization.clone` if necessary, and inserts the 344 /// `bufferization.dealloc` operation according to the ops operands. 345 /// 346 /// Example: 347 /// ``` 348 /// ^bb1: 349 /// <more ops...> 350 /// func.return <return-vals> 351 /// ``` 352 /// becomes 353 /// ``` 354 /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1) 355 /// // let r = getMemrefsToRetain(bb1, nullptr, <return-vals>) 356 /// ^bb1: 357 /// <more ops...> 358 /// o = bufferization.dealloc m if c retain r 359 /// func.return <return-vals> 360 /// (if !isFunctionWithoutDynamicOwnership: append o) 361 /// ``` 362 FailureOr<Operation *> handleInterface(RegionBranchTerminatorOpInterface op); 363 364 /// Construct a new operation which is exactly the same as the passed 'op' 365 /// except that the OpResults list is appended by new results of the passed 366 /// 'types'. 367 /// TODO: ideally, this would be implemented using an OpInterface because it 368 /// is used to append function results, loop iter_args, etc. and thus makes 369 /// some assumptions that the variadic list of those is at the end of the 370 /// OpResults range. 371 Operation *appendOpResults(Operation *op, ArrayRef<Type> types); 372 373 /// A convenience template for the generic 'appendOpResults' function above to 374 /// avoid manual casting of the result. 375 template <typename OpTy> 376 OpTy appendOpResults(OpTy op, ArrayRef<Type> types) { 377 return cast<OpTy>(appendOpResults(op.getOperation(), types)); 378 } 379 380 /// Performs deallocation of a single basic block. This is a private function 381 /// because some internal data structures have to be set up beforehand and 382 /// this function has to be called on blocks in a region in dominance order. 383 LogicalResult deallocate(Block *block); 384 385 /// After all relevant interfaces of an operation have been processed by the 386 /// 'handleInterface' functions, this function sets the ownership of operation 387 /// results that have not been set yet by the 'handleInterface' functions. It 388 /// generally assumes that each result can alias with every operand of the 389 /// operation, if there are MemRef typed results but no MemRef operands it 390 /// assigns 'false' as ownership. This happens, e.g., for the 391 /// memref.get_global operation. It would also be possible to query some alias 392 /// analysis to get more precise ownerships, however, the analysis would have 393 /// to be updated according to the IR modifications this pass performs (e.g., 394 /// re-building operations to have more result values, inserting clone 395 /// operations, etc.). 396 void populateRemainingOwnerships(Operation *op); 397 398 /// Given an SSA value of MemRef type, returns the same of a new SSA value 399 /// which has 'Unique' ownership where the ownership indicator is guaranteed 400 /// to be always 'true'. 401 Value materializeMemrefWithGuaranteedOwnership(OpBuilder &builder, 402 Value memref, Block *block); 403 404 /// Returns whether the given operation implements FunctionOpInterface, has 405 /// private visibility, and the private-function-dynamic-ownership pass option 406 /// is enabled. 407 bool isFunctionWithoutDynamicOwnership(Operation *op); 408 409 /// Given an SSA value of MemRef type, this function queries the 410 /// BufferDeallocationOpInterface of the defining operation of 'memref' for a 411 /// materialized ownership indicator for 'memref'. If the op does not 412 /// implement the interface or if the block for which the materialized value 413 /// is requested does not match the block in which 'memref' is defined, the 414 /// default implementation in 415 /// `DeallocationState::getMemrefWithUniqueOwnership` is queried instead. 416 std::pair<Value, Value> 417 materializeUniqueOwnership(OpBuilder &builder, Value memref, Block *block); 418 419 /// Checks all the preconditions for operations implementing the 420 /// FunctionOpInterface that have to hold for the deallocation to be 421 /// applicable: 422 /// (1) Checks that there are not explicit control flow loops. 423 static LogicalResult verifyFunctionPreconditions(FunctionOpInterface op); 424 425 /// Checks all the preconditions for operations inside the region of 426 /// operations implementing the FunctionOpInterface that have to hold for the 427 /// deallocation to be applicable: 428 /// (1) Checks if all operations that have at least one attached region 429 /// implement the RegionBranchOpInterface. This is not required in edge cases, 430 /// where we have a single attached region and the parent operation has no 431 /// results. 432 /// (2) Checks that no deallocations already exist. Especially deallocations 433 /// in nested regions are not properly supported yet since this requires 434 /// ownership of the memref to be transferred to the nested region, which does 435 /// not happen by default. This constrained can be lifted in the future. 436 /// (3) Checks that terminators with more than one successor except 437 /// `cf.cond_br` are not present and that either BranchOpInterface or 438 /// RegionBranchTerminatorOpInterface is implemented. 439 static LogicalResult verifyOperationPreconditions(Operation *op); 440 441 /// When the 'private-function-dynamic-ownership' pass option is enabled, 442 /// additional `i1` return values are added for each MemRef result in the 443 /// function signature. This function takes care of updating the 444 /// `function_type` attribute of the function according to the actually 445 /// returned values from the terminators. 446 static LogicalResult updateFunctionSignature(FunctionOpInterface op); 447 448 private: 449 /// Collects all analysis state and including liveness, caches, ownerships of 450 /// already processed values and operations, and the MemRefs that have to be 451 /// deallocated at the end of each block. 452 DeallocationState state; 453 454 /// Collects all pass options in a single place. 455 DeallocationOptions options; 456 }; 457 458 } // namespace 459 460 //===----------------------------------------------------------------------===// 461 // BufferDeallocation Implementation 462 //===----------------------------------------------------------------------===// 463 464 std::pair<Value, Value> 465 BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref, 466 Block *block) { 467 // The interface can only materialize ownership indicators in the same block 468 // as the defining op. 469 if (memref.getParentBlock() != block) 470 return state.getMemrefWithUniqueOwnership(builder, memref, block); 471 472 Operation *owner = memref.getDefiningOp(); 473 if (!owner) 474 owner = memref.getParentBlock()->getParentOp(); 475 476 // If the op implements the interface, query it for a materialized ownership 477 // value. 478 if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(owner)) 479 return deallocOpInterface.materializeUniqueOwnershipForMemref( 480 state, options, builder, memref); 481 482 // Otherwise use the default implementation. 483 return state.getMemrefWithUniqueOwnership(builder, memref, block); 484 } 485 486 LogicalResult 487 BufferDeallocation::verifyFunctionPreconditions(FunctionOpInterface op) { 488 // (1) Ensure that there are supported loops only (no explicit control flow 489 // loops). 490 Backedges backedges(op); 491 if (backedges.size()) { 492 op->emitError("Only structured control-flow loops are supported."); 493 return failure(); 494 } 495 496 return success(); 497 } 498 499 LogicalResult BufferDeallocation::verifyOperationPreconditions(Operation *op) { 500 // We do not care about ops that do not operate on buffers and have no 501 // Allocate/Free side effect. 502 if (!hasBufferSemantics(op) && hasNeitherAllocateNorFreeSideEffect(op)) 503 return success(); 504 505 // (1) The pass does not work properly when deallocations are already present. 506 // Alternatively, we could also remove all deallocations as a pre-pass. 507 if (isa<DeallocOp>(op)) 508 return op->emitError( 509 "No deallocation operations must be present when running this pass!"); 510 511 // (2) Memory side effects of unregistered ops are unknown. In particular, we 512 // do not know whether an unregistered op allocates memory or not. 513 // - Ops with recursive memory effects are allowed. All nested ops in the 514 // regions of `op` will be analyzed separately. 515 // - Call ops are allowed even though they typically do not implement the 516 // MemoryEffectOpInterface. They usually do not have side effects apart 517 // from the callee, which will be analyzed separately. (This is similar to 518 // "recursive memory effects".) 519 if (!isa<MemoryEffectOpInterface>(op) && 520 !op->hasTrait<OpTrait::HasRecursiveMemoryEffects>() && 521 !isa<CallOpInterface>(op)) 522 return op->emitError( 523 "ops with unknown memory side effects are not supported"); 524 525 // (3) Check that the control flow structures are supported. 526 auto regions = op->getRegions(); 527 // Check that if the operation has at 528 // least one region it implements the RegionBranchOpInterface. If there 529 // is an operation that does not fulfill this condition, we cannot apply 530 // the deallocation steps. Furthermore, we accept cases, where we have a 531 // region that returns no results, since, in that case, the intra-region 532 // control flow does not affect the transformation. 533 size_t size = regions.size(); 534 if (((size == 1 && !op->getResults().empty()) || size > 1) && 535 !dyn_cast<RegionBranchOpInterface>(op)) { 536 return op->emitError("All operations with attached regions need to " 537 "implement the RegionBranchOpInterface."); 538 } 539 540 // (3) Check that terminators with more than one successor except `cf.cond_br` 541 // are not present and that either BranchOpInterface or 542 // RegionBranchTerminatorOpInterface is implemented. 543 if (op->hasTrait<OpTrait::NoTerminator>()) 544 return op->emitError("NoTerminator trait is not supported"); 545 546 if (op->hasTrait<OpTrait::IsTerminator>()) { 547 // Either one of those interfaces has to be implemented on terminators, but 548 // not both. 549 if (!isa<BranchOpInterface, RegionBranchTerminatorOpInterface>(op) || 550 (isa<BranchOpInterface>(op) && 551 isa<RegionBranchTerminatorOpInterface>(op))) 552 553 return op->emitError( 554 "Terminators must implement either BranchOpInterface or " 555 "RegionBranchTerminatorOpInterface (but not both)!"); 556 557 // We only support terminators with 0 or 1 successors for now and 558 // special-case the conditional branch op. 559 if (op->getSuccessors().size() > 1) 560 561 return op->emitError("Terminators with more than one successor " 562 "are not supported!"); 563 } 564 565 return success(); 566 } 567 568 LogicalResult 569 BufferDeallocation::updateFunctionSignature(FunctionOpInterface op) { 570 SmallVector<TypeRange> returnOperandTypes(llvm::map_range( 571 op.getFunctionBody().getOps<RegionBranchTerminatorOpInterface>(), 572 [](RegionBranchTerminatorOpInterface op) { 573 return op.getSuccessorOperands(RegionBranchPoint::parent()).getTypes(); 574 })); 575 if (!llvm::all_equal(returnOperandTypes)) 576 return op->emitError( 577 "there are multiple return operations with different operand types"); 578 579 TypeRange resultTypes = op.getResultTypes(); 580 // Check if we found a return operation because that doesn't necessarily 581 // always have to be the case, e.g., consider a function with one block that 582 // has a cf.br at the end branching to itself again (i.e., an infinite loop). 583 // In that case we don't want to crash but just not update the return types. 584 if (!returnOperandTypes.empty()) 585 resultTypes = returnOperandTypes[0]; 586 587 op.setFunctionTypeAttr(TypeAttr::get(FunctionType::get( 588 op->getContext(), op.getFunctionBody().front().getArgumentTypes(), 589 resultTypes))); 590 591 return success(); 592 } 593 594 LogicalResult BufferDeallocation::deallocate(FunctionOpInterface op) { 595 // Stop and emit a proper error message if we don't support the input IR. 596 if (failed(verifyFunctionPreconditions(op))) 597 return failure(); 598 599 // Process the function block by block. 600 auto result = op->walk<WalkOrder::PostOrder, ForwardDominanceIterator<>>( 601 [&](Block *block) { 602 if (failed(deallocate(block))) 603 return WalkResult::interrupt(); 604 return WalkResult::advance(); 605 }); 606 if (result.wasInterrupted()) 607 return failure(); 608 609 // Update the function signature if the function is private, dynamic ownership 610 // is enabled, and the function has memrefs as arguments or results. 611 return updateFunctionSignature(op); 612 } 613 614 LogicalResult BufferDeallocation::deallocate(Block *block) { 615 OpBuilder builder = OpBuilder::atBlockBegin(block); 616 617 // Compute liveness transfers of ownership to this block. 618 SmallVector<Value> liveMemrefs; 619 state.getLiveMemrefsIn(block, liveMemrefs); 620 for (auto li : liveMemrefs) { 621 // Ownership of implicitly captured memrefs from other regions is never 622 // taken, but ownership of memrefs in the same region (but different block) 623 // is taken. 624 if (li.getParentRegion() == block->getParent()) { 625 state.updateOwnership(li, state.getOwnership(li, li.getParentBlock()), 626 block); 627 state.addMemrefToDeallocate(li, block); 628 continue; 629 } 630 631 if (li.getParentRegion()->isProperAncestor(block->getParent())) { 632 Value falseVal = buildBoolValue(builder, li.getLoc(), false); 633 state.updateOwnership(li, falseVal, block); 634 } 635 } 636 637 for (unsigned i = 0, e = block->getNumArguments(); i < e; ++i) { 638 BlockArgument arg = block->getArgument(i); 639 if (!isMemref(arg)) 640 continue; 641 642 // Adhere to function boundary ABI: no ownership of function argument 643 // MemRefs is taken. 644 if (isa<FunctionOpInterface>(block->getParentOp()) && 645 block->isEntryBlock()) { 646 Value newArg = buildBoolValue(builder, arg.getLoc(), false); 647 state.updateOwnership(arg, newArg); 648 state.addMemrefToDeallocate(arg, block); 649 continue; 650 } 651 652 // Pass MemRef ownerships along via `i1` values. 653 Value newArg = block->addArgument(builder.getI1Type(), arg.getLoc()); 654 state.updateOwnership(arg, newArg); 655 state.addMemrefToDeallocate(arg, block); 656 } 657 658 // For each operation in the block, handle the interfaces that affect aliasing 659 // and ownership of memrefs. 660 for (Operation &op : llvm::make_early_inc_range(*block)) { 661 FailureOr<Operation *> result = handleAllInterfaces(&op); 662 if (failed(result)) 663 return failure(); 664 if (!*result) 665 continue; 666 667 populateRemainingOwnerships(*result); 668 } 669 670 // TODO: if block has no terminator, handle dealloc insertion here. 671 return success(); 672 } 673 674 Operation *BufferDeallocation::appendOpResults(Operation *op, 675 ArrayRef<Type> types) { 676 SmallVector<Type> newTypes(op->getResultTypes()); 677 newTypes.append(types.begin(), types.end()); 678 auto *newOp = Operation::create(op->getLoc(), op->getName(), newTypes, 679 op->getOperands(), op->getAttrDictionary(), 680 op->getPropertiesStorage(), 681 op->getSuccessors(), op->getNumRegions()); 682 for (auto [oldRegion, newRegion] : 683 llvm::zip(op->getRegions(), newOp->getRegions())) 684 newRegion.takeBody(oldRegion); 685 686 OpBuilder(op).insert(newOp); 687 op->replaceAllUsesWith(newOp->getResults().take_front(op->getNumResults())); 688 op->erase(); 689 690 return newOp; 691 } 692 693 FailureOr<Operation *> 694 BufferDeallocation::handleInterface(RegionBranchOpInterface op) { 695 OpBuilder builder = OpBuilder::atBlockBegin(op->getBlock()); 696 697 // TODO: the RegionBranchOpInterface does not provide all the necessary 698 // methods to perform this transformation without additional assumptions on 699 // the structure. In particular, that 700 // * additional values to be passed to the next region can be added to the end 701 // of the operand list, the end of the block argument list, and the end of 702 // the result value list. However, it seems to be the general guideline for 703 // operations implementing this interface to follow this structure. 704 // * and that the block arguments and result values match the forwarded 705 // operands one-to-one (i.e., that there are no other values appended to the 706 // front). 707 // These assumptions are satisfied by the `scf.if`, `scf.for`, and `scf.while` 708 // operations. 709 710 SmallVector<RegionSuccessor> regions; 711 op.getSuccessorRegions(RegionBranchPoint::parent(), regions); 712 assert(!regions.empty() && "Must have at least one successor region"); 713 SmallVector<Value> entryOperands( 714 op.getEntrySuccessorOperands(regions.front())); 715 unsigned numMemrefOperands = llvm::count_if(entryOperands, isMemref); 716 717 // No ownership is acquired for any MemRefs that are passed to the region from 718 // the outside. 719 Value falseVal = buildBoolValue(builder, op.getLoc(), false); 720 op->insertOperands(op->getNumOperands(), 721 SmallVector<Value>(numMemrefOperands, falseVal)); 722 723 int counter = op->getNumResults(); 724 unsigned numMemrefResults = llvm::count_if(op->getResults(), isMemref); 725 SmallVector<Type> ownershipResults(numMemrefResults, builder.getI1Type()); 726 RegionBranchOpInterface newOp = appendOpResults(op, ownershipResults); 727 728 for (auto result : llvm::make_filter_range(newOp->getResults(), isMemref)) { 729 state.updateOwnership(result, newOp->getResult(counter++)); 730 state.addMemrefToDeallocate(result, newOp->getBlock()); 731 } 732 733 return newOp.getOperation(); 734 } 735 736 Value BufferDeallocation::materializeMemrefWithGuaranteedOwnership( 737 OpBuilder &builder, Value memref, Block *block) { 738 // First, make sure we at least have 'Unique' ownership already. 739 std::pair<Value, Value> newMemrefAndOnwership = 740 materializeUniqueOwnership(builder, memref, block); 741 Value newMemref = newMemrefAndOnwership.first; 742 Value condition = newMemrefAndOnwership.second; 743 744 // Avoid inserting additional IR if ownership is already guaranteed. In 745 // particular, this is already the case when we had 'Unknown' ownership 746 // initially and a clone was inserted to get to 'Unique' ownership. 747 if (matchPattern(condition, m_One())) 748 return newMemref; 749 750 // Insert a runtime check and only clone if we still don't have ownership at 751 // runtime. 752 Value maybeClone = 753 builder 754 .create<scf::IfOp>( 755 memref.getLoc(), condition, 756 [&](OpBuilder &builder, Location loc) { 757 builder.create<scf::YieldOp>(loc, newMemref); 758 }, 759 [&](OpBuilder &builder, Location loc) { 760 Value clone = 761 builder.create<bufferization::CloneOp>(loc, newMemref); 762 builder.create<scf::YieldOp>(loc, clone); 763 }) 764 .getResult(0); 765 Value trueVal = buildBoolValue(builder, memref.getLoc(), true); 766 state.updateOwnership(maybeClone, trueVal); 767 state.addMemrefToDeallocate(maybeClone, maybeClone.getParentBlock()); 768 return maybeClone; 769 } 770 771 FailureOr<Operation *> 772 BufferDeallocation::handleInterface(BranchOpInterface op) { 773 if (op->getNumSuccessors() > 1) 774 return op->emitError("BranchOpInterface operations with multiple " 775 "successors are not supported yet"); 776 777 if (op->getNumSuccessors() != 1) 778 return emitError(op.getLoc(), 779 "only BranchOpInterface operations with exactly " 780 "one successor are supported yet"); 781 782 if (op.getSuccessorOperands(0).getProducedOperandCount() > 0) 783 return op.emitError("produced operands are not supported"); 784 785 // Collect the values to deallocate and retain and use them to create the 786 // dealloc operation. 787 Block *block = op->getBlock(); 788 OpBuilder builder(op); 789 SmallVector<Value> memrefs, conditions, toRetain; 790 if (failed(state.getMemrefsAndConditionsToDeallocate( 791 builder, op.getLoc(), block, memrefs, conditions))) 792 return failure(); 793 794 OperandRange forwardedOperands = 795 op.getSuccessorOperands(0).getForwardedOperands(); 796 state.getMemrefsToRetain(block, op->getSuccessor(0), forwardedOperands, 797 toRetain); 798 799 auto deallocOp = builder.create<bufferization::DeallocOp>( 800 op.getLoc(), memrefs, conditions, toRetain); 801 802 // We want to replace the current ownership of the retained values with the 803 // result values of the dealloc operation as they are always unique. 804 state.resetOwnerships(deallocOp.getRetained(), block); 805 for (auto [retained, ownership] : 806 llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions())) { 807 state.updateOwnership(retained, ownership, block); 808 } 809 810 unsigned numAdditionalReturns = llvm::count_if(forwardedOperands, isMemref); 811 SmallVector<Value> newOperands(forwardedOperands); 812 auto additionalConditions = 813 deallocOp.getUpdatedConditions().take_front(numAdditionalReturns); 814 newOperands.append(additionalConditions.begin(), additionalConditions.end()); 815 op.getSuccessorOperands(0).getMutableForwardedOperands().assign(newOperands); 816 817 return op.getOperation(); 818 } 819 820 FailureOr<Operation *> BufferDeallocation::handleInterface(CallOpInterface op) { 821 OpBuilder builder(op); 822 823 // Lookup the function operation and check if it has private visibility. If 824 // the function is referenced by SSA value instead of a Symbol, it's assumed 825 // to be public. (And we cannot easily change the type of the SSA value 826 // anyway.) 827 Operation *funcOp = op.resolveCallableInTable(state.getSymbolTable()); 828 bool isPrivate = false; 829 if (auto symbol = dyn_cast_or_null<SymbolOpInterface>(funcOp)) 830 isPrivate = symbol.isPrivate() && !symbol.isDeclaration(); 831 832 // If the private-function-dynamic-ownership option is enabled and we are 833 // calling a private function, we need to add an additional `i1` result for 834 // each MemRef result to dynamically pass the current ownership indicator 835 // rather than adhering to the function boundary ABI. 836 if (options.privateFuncDynamicOwnership && isPrivate) { 837 unsigned numMemrefs = llvm::count_if(op->getResults(), isMemref); 838 SmallVector<Type> ownershipTypesToAppend(numMemrefs, builder.getI1Type()); 839 unsigned ownershipCounter = op->getNumResults(); 840 op = appendOpResults(op, ownershipTypesToAppend); 841 842 for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) { 843 state.updateOwnership(result, op->getResult(ownershipCounter++)); 844 state.addMemrefToDeallocate(result, result.getParentBlock()); 845 } 846 847 return op.getOperation(); 848 } 849 850 // According to the function boundary ABI we are guaranteed to get ownership 851 // of all MemRefs returned by the function. Thus we set ownership to constant 852 // 'true' and remember to deallocate it. 853 Value trueVal = buildBoolValue(builder, op.getLoc(), true); 854 for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) { 855 state.updateOwnership(result, trueVal); 856 state.addMemrefToDeallocate(result, result.getParentBlock()); 857 } 858 859 return op.getOperation(); 860 } 861 862 FailureOr<Operation *> 863 BufferDeallocation::handleInterface(MemoryEffectOpInterface op) { 864 auto *block = op->getBlock(); 865 OpBuilder builder = OpBuilder::atBlockBegin(block); 866 867 for (auto operand : llvm::make_filter_range(op->getOperands(), isMemref)) { 868 if (op.getEffectOnValue<MemoryEffects::Free>(operand).has_value()) { 869 // The bufferization.manual_deallocation attribute can be attached to ops 870 // with an allocation and/or deallocation side effect. It indicates that 871 // the op is under a "manual deallocation" scheme. Deallocation ops are 872 // usually forbidden in the input IR (not supported by the buffer 873 // deallocation pass). However, if they are under manual deallocation, 874 // they can be safely ignored by the buffer deallocation pass. 875 if (!op->hasAttr(BufferizationDialect::kManualDeallocation)) 876 return op->emitError( 877 "memory free side-effect on MemRef value not supported!"); 878 879 // Buffers that were allocated under "manual deallocation" may be 880 // manually deallocated. We insert a runtime assertion to cover certain 881 // cases of invalid IR where an automatically managed buffer allocation 882 // is manually deallocated. This is not a bulletproof check! 883 OpBuilder::InsertionGuard g(builder); 884 builder.setInsertionPoint(op); 885 Ownership ownership = state.getOwnership(operand, block); 886 if (ownership.isUnique()) { 887 Value ownershipInverted = builder.create<arith::XOrIOp>( 888 op.getLoc(), ownership.getIndicator(), 889 buildBoolValue(builder, op.getLoc(), true)); 890 builder.create<cf::AssertOp>( 891 op.getLoc(), ownershipInverted, 892 "expected that the block does not have ownership"); 893 } 894 } 895 } 896 897 for (auto res : llvm::make_filter_range(op->getResults(), isMemref)) { 898 auto allocEffect = op.getEffectOnValue<MemoryEffects::Allocate>(res); 899 if (allocEffect.has_value()) { 900 if (isa<SideEffects::AutomaticAllocationScopeResource>( 901 allocEffect->getResource())) { 902 // Make sure that the ownership of auto-managed allocations is set to 903 // false. This is important for operations that have at least one memref 904 // typed operand. E.g., consider an operation like `bufferization.clone` 905 // that lowers to a `memref.alloca + memref.copy` instead of a 906 // `memref.alloc`. If we wouldn't set the ownership of the result here, 907 // the default ownership population in `populateRemainingOwnerships` 908 // would assume aliasing with the MemRef operand. 909 state.resetOwnerships(res, block); 910 state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false)); 911 continue; 912 } 913 914 if (op->hasAttr(BufferizationDialect::kManualDeallocation)) { 915 // This allocation will be deallocated manually. Assign an ownership of 916 // "false", so that it will never be deallocated by the buffer 917 // deallocation pass. 918 state.resetOwnerships(res, block); 919 state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false)); 920 continue; 921 } 922 923 state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), true)); 924 state.addMemrefToDeallocate(res, block); 925 } 926 } 927 928 return op.getOperation(); 929 } 930 931 FailureOr<Operation *> 932 BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) { 933 OpBuilder builder(op); 934 935 // If this is a return operation of a function that is not private or the 936 // dynamic function boundary ownership is disabled, we need to return memref 937 // values for which we have guaranteed ownership to pass on to adhere to the 938 // function boundary ABI. 939 bool funcWithoutDynamicOwnership = 940 isFunctionWithoutDynamicOwnership(op->getParentOp()); 941 if (funcWithoutDynamicOwnership) { 942 for (OpOperand &val : op->getOpOperands()) { 943 if (!isMemref(val.get())) 944 continue; 945 946 val.set(materializeMemrefWithGuaranteedOwnership(builder, val.get(), 947 op->getBlock())); 948 } 949 } 950 951 // TODO: getSuccessorRegions is not implemented by all operations we care 952 // about, but we would need to check how many successors there are and under 953 // which condition they are taken, etc. 954 955 MutableOperandRange operands = 956 op.getMutableSuccessorOperands(RegionBranchPoint::parent()); 957 958 SmallVector<Value> updatedOwnerships; 959 auto result = deallocation_impl::insertDeallocOpForReturnLike( 960 state, op, operands.getAsOperandRange(), updatedOwnerships); 961 if (failed(result) || !*result) 962 return result; 963 964 // Add an additional operand for every MemRef for the ownership indicator. 965 if (!funcWithoutDynamicOwnership) { 966 SmallVector<Value> newOperands{operands.getAsOperandRange()}; 967 newOperands.append(updatedOwnerships.begin(), updatedOwnerships.end()); 968 operands.assign(newOperands); 969 } 970 971 return op.getOperation(); 972 } 973 974 bool BufferDeallocation::isFunctionWithoutDynamicOwnership(Operation *op) { 975 auto funcOp = dyn_cast<FunctionOpInterface>(op); 976 return funcOp && (!options.privateFuncDynamicOwnership || 977 !funcOp.isPrivate() || funcOp.isExternal()); 978 } 979 980 void BufferDeallocation::populateRemainingOwnerships(Operation *op) { 981 for (auto res : op->getResults()) { 982 if (!isMemref(res)) 983 continue; 984 if (!state.getOwnership(res, op->getBlock()).isUninitialized()) 985 continue; 986 987 // The op does not allocate memory, otherwise, it would have been assigned 988 // an ownership during `handleInterface`. Assume the result may alias with 989 // any memref operand and thus combine all their ownerships. 990 for (auto operand : op->getOperands()) { 991 if (!isMemref(operand)) 992 continue; 993 994 state.updateOwnership( 995 res, state.getOwnership(operand, operand.getParentBlock()), 996 op->getBlock()); 997 } 998 999 // If the ownership value is still uninitialized (e.g., because the op has 1000 // no memref operands), assume that no ownership is taken. E.g., this is the 1001 // case for "memref.get_global". 1002 // 1003 // Note: This can lead to memory leaks if memory side effects are not 1004 // properly specified on the op. 1005 if (state.getOwnership(res, op->getBlock()).isUninitialized()) { 1006 OpBuilder builder(op); 1007 state.updateOwnership(res, buildBoolValue(builder, op->getLoc(), false)); 1008 } 1009 } 1010 } 1011 1012 //===----------------------------------------------------------------------===// 1013 // OwnershipBasedBufferDeallocationPass 1014 //===----------------------------------------------------------------------===// 1015 1016 namespace { 1017 1018 /// The actual buffer deallocation pass that inserts and moves dealloc nodes 1019 /// into the right positions. Furthermore, it inserts additional clones if 1020 /// necessary. It uses the algorithm described at the top of the file. 1021 struct OwnershipBasedBufferDeallocationPass 1022 : public bufferization::impl::OwnershipBasedBufferDeallocationBase< 1023 OwnershipBasedBufferDeallocationPass> { 1024 OwnershipBasedBufferDeallocationPass() = default; 1025 OwnershipBasedBufferDeallocationPass(DeallocationOptions options) 1026 : OwnershipBasedBufferDeallocationPass() { 1027 this->privateFuncDynamicOwnership.setValue( 1028 options.privateFuncDynamicOwnership); 1029 } 1030 void runOnOperation() override { 1031 DeallocationOptions options; 1032 options.privateFuncDynamicOwnership = privateFuncDynamicOwnership; 1033 1034 auto status = getOperation()->walk([&](func::FuncOp func) { 1035 if (func.isExternal()) 1036 return WalkResult::skip(); 1037 1038 if (failed(deallocateBuffersOwnershipBased(func, options))) 1039 return WalkResult::interrupt(); 1040 1041 return WalkResult::advance(); 1042 }); 1043 if (status.wasInterrupted()) 1044 signalPassFailure(); 1045 } 1046 }; 1047 1048 } // namespace 1049 1050 //===----------------------------------------------------------------------===// 1051 // Implement bufferization API 1052 //===----------------------------------------------------------------------===// 1053 1054 LogicalResult 1055 bufferization::deallocateBuffersOwnershipBased(FunctionOpInterface op, 1056 DeallocationOptions options) { 1057 // Gather all required allocation nodes and prepare the deallocation phase. 1058 BufferDeallocation deallocation(op, options); 1059 1060 // Place all required temporary clone and dealloc nodes. 1061 return deallocation.deallocate(op); 1062 } 1063 1064 //===----------------------------------------------------------------------===// 1065 // OwnershipBasedBufferDeallocationPass construction 1066 //===----------------------------------------------------------------------===// 1067 1068 std::unique_ptr<Pass> 1069 mlir::bufferization::createOwnershipBasedBufferDeallocationPass( 1070 DeallocationOptions options) { 1071 return std::make_unique<OwnershipBasedBufferDeallocationPass>(options); 1072 } 1073