1 //===- MemoryUtils.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "flang/Optimizer/Transforms/MemoryUtils.h" 10 #include "flang/Optimizer/Builder/FIRBuilder.h" 11 #include "flang/Optimizer/Builder/Todo.h" 12 #include "mlir/Dialect/OpenACC/OpenACC.h" 13 #include "mlir/IR/Dominance.h" 14 #include "llvm/ADT/STLExtras.h" 15 16 namespace { 17 /// Helper class to detect if an alloca is inside an mlir::Block that can be 18 /// reached again before its deallocation points via block successors. This 19 /// analysis is only valid if the deallocation points are inside (or nested 20 /// inside) the same region as alloca because it does not consider region CFG 21 /// (for instance, the block inside a fir.do_loop is obviously inside a loop, 22 /// but is not a loop formed by blocks). The dominance of the alloca on its 23 /// deallocation points implies this pre-condition (although it is more 24 /// restrictive). 25 class BlockCycleDetector { 26 public: 27 bool allocaIsInCycle(fir::AllocaOp alloca, 28 llvm::ArrayRef<mlir::Operation *> deallocationPoints); 29 30 private: 31 // Cache for blocks owning alloca that have been analyzed. In many Fortran 32 // programs, allocas are usually made in the same blocks with no block cycles. 33 // So getting a fast "no" is beneficial. 34 llvm::DenseMap<mlir::Block *, /*isInCycle*/ bool> analyzed; 35 }; 36 } // namespace 37 38 namespace { 39 class AllocaReplaceImpl { 40 public: 41 AllocaReplaceImpl(fir::AllocaRewriterCallBack allocaRewriter, 42 fir::DeallocCallBack deallocGenerator) 43 : allocaRewriter{allocaRewriter}, deallocGenerator{deallocGenerator} {} 44 bool replace(mlir::RewriterBase &, fir::AllocaOp); 45 46 private: 47 mlir::Region *findDeallocationPointsAndOwner( 48 fir::AllocaOp alloca, 49 llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints); 50 bool 51 allocDominatesDealloc(fir::AllocaOp alloca, 52 llvm::ArrayRef<mlir::Operation *> deallocationPoints) { 53 return llvm::all_of(deallocationPoints, [&](mlir::Operation *deallocPoint) { 54 return this->dominanceInfo.properlyDominates(alloca.getOperation(), 55 deallocPoint); 56 }); 57 } 58 void 59 genIndirectDeallocation(mlir::RewriterBase &, fir::AllocaOp, 60 llvm::ArrayRef<mlir::Operation *> deallocationPoints, 61 mlir::Value replacement, mlir::Region &owningRegion); 62 63 private: 64 fir::AllocaRewriterCallBack allocaRewriter; 65 fir::DeallocCallBack deallocGenerator; 66 mlir::DominanceInfo dominanceInfo; 67 BlockCycleDetector blockCycleDetector; 68 }; 69 } // namespace 70 71 static bool 72 allocaIsInCycleImpl(mlir::Block *allocaBlock, 73 llvm::ArrayRef<mlir::Operation *> deallocationPoints) { 74 llvm::DenseSet<mlir::Block *> seen; 75 // Insert the deallocation point blocks as "seen" so that the block 76 // traversal will stop at them. 77 for (mlir::Operation *deallocPoint : deallocationPoints) 78 seen.insert(deallocPoint->getBlock()); 79 if (seen.contains(allocaBlock)) 80 return false; 81 // Traverse the block successor graph starting by the alloca block. 82 llvm::SmallVector<mlir::Block *> successors{allocaBlock}; 83 while (!successors.empty()) 84 for (mlir::Block *next : successors.pop_back_val()->getSuccessors()) { 85 if (next == allocaBlock) 86 return true; 87 if (auto pair = seen.insert(next); pair.second) 88 successors.push_back(next); 89 } 90 // The traversal did not reach the alloca block again. 91 return false; 92 } 93 bool BlockCycleDetector::allocaIsInCycle( 94 fir::AllocaOp alloca, 95 llvm::ArrayRef<mlir::Operation *> deallocationPoints) { 96 mlir::Block *allocaBlock = alloca->getBlock(); 97 auto analyzedPair = analyzed.try_emplace(allocaBlock, /*isInCycle=*/false); 98 bool alreadyAnalyzed = !analyzedPair.second; 99 bool &isInCycle = analyzedPair.first->second; 100 // Fast exit if block was already analyzed and no cycle was found. 101 if (alreadyAnalyzed && !isInCycle) 102 return false; 103 // If the analysis was not done generically for this block, run it and 104 // save the result. 105 if (!alreadyAnalyzed) 106 isInCycle = allocaIsInCycleImpl(allocaBlock, /*deallocationPoints*/ {}); 107 if (!isInCycle) 108 return false; 109 // If the generic analysis found a block loop, see if the deallocation 110 // point would be reached before reaching the block again. Do not 111 // cache that analysis that is specific to the deallocation points 112 // found for this alloca. 113 return allocaIsInCycleImpl(allocaBlock, deallocationPoints); 114 } 115 116 static bool terminatorYieldsMemory(mlir::Operation &terminator) { 117 return llvm::any_of(terminator.getResults(), [](mlir::OpResult res) { 118 return fir::conformsWithPassByRef(res.getType()); 119 }); 120 } 121 122 static bool isRegionTerminator(mlir::Operation &terminator) { 123 // Using ReturnLike trait is tempting but it is not set on 124 // all region terminator that matters (like omp::TerminatorOp that 125 // has no results). 126 // May be true for dead code. It is not a correctness issue and dead code can 127 // be eliminated by running region simplification before this utility is 128 // used. 129 // May also be true for unreachable like terminators (e.g., after an abort 130 // call related to Fortran STOP). This is also OK, the inserted deallocation 131 // will simply never be reached. It is easier for the rest of the code here 132 // to assume there is always at least one deallocation point, so keep 133 // unreachable terminators. 134 return !terminator.hasSuccessors(); 135 } 136 137 mlir::Region *AllocaReplaceImpl::findDeallocationPointsAndOwner( 138 fir::AllocaOp alloca, 139 llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints) { 140 // Step 1: Identify the operation and region owning the alloca. 141 mlir::Region *owningRegion = alloca.getOwnerRegion(); 142 if (!owningRegion) 143 return nullptr; 144 mlir::Operation *owningOp = owningRegion->getParentOp(); 145 assert(owningOp && "region expected to be owned"); 146 // Step 2: Identify the exit points of the owning region, they are the default 147 // deallocation points. TODO: detect and use lifetime markers to get earlier 148 // deallocation points. 149 bool isOpenACCMPRecipe = mlir::isa<mlir::accomp::RecipeInterface>(owningOp); 150 for (mlir::Block &block : owningRegion->getBlocks()) 151 if (mlir::Operation *terminator = block.getTerminator(); 152 isRegionTerminator(*terminator)) { 153 // FIXME: OpenACC and OpenMP privatization recipe are stand alone 154 // operation meant to be later "inlined", the value they return may 155 // be the address of a local alloca. It would be incorrect to insert 156 // deallocation before the terminator (this would introduce use after 157 // free once the recipe is inlined. 158 // This probably require redesign or special handling on the OpenACC/MP 159 // side. 160 if (isOpenACCMPRecipe && terminatorYieldsMemory(*terminator)) 161 return nullptr; 162 deallocationPoints.push_back(terminator); 163 } 164 // If no block terminators without successors have been found, this is 165 // an odd region we cannot reason about (never seen yet in FIR and 166 // mainstream dialects, but MLIR does not really prevent it). 167 if (deallocationPoints.empty()) 168 return nullptr; 169 170 // Step 3: detect block based loops between the allocation and deallocation 171 // points, and add a deallocation point on the back edge to avoid memory 172 // leaks. 173 // The detection avoids doing region CFG analysis by assuming that there may 174 // be cycles if deallocation points are not dominated by the alloca. 175 // This leaves the cases where the deallocation points are in the same region 176 // as the alloca (or nested inside it). In which cases there may be a back 177 // edge between the alloca and the deallocation point via block successors. An 178 // analysis is run to detect those cases. 179 // When a loop is detected, the easiest solution to deallocate on the back 180 // edge is to store the allocated memory address in a variable (that dominates 181 // the loops) and to deallocate the address in that variable if it is set 182 // before executing the allocation. This strategy still leads to correct 183 // execution in the "false positive" cases. 184 // Hence, the alloca is added as a deallocation point when there is no 185 // dominance. Note that bringing lifetime markers above will reduce the 186 // false positives. 187 if (!allocDominatesDealloc(alloca, deallocationPoints) || 188 blockCycleDetector.allocaIsInCycle(alloca, deallocationPoints)) 189 deallocationPoints.push_back(alloca.getOperation()); 190 return owningRegion; 191 } 192 193 void AllocaReplaceImpl::genIndirectDeallocation( 194 mlir::RewriterBase &rewriter, fir::AllocaOp alloca, 195 llvm::ArrayRef<mlir::Operation *> deallocationPoints, 196 mlir::Value replacement, mlir::Region &owningRegion) { 197 mlir::Location loc = alloca.getLoc(); 198 auto replacementInsertPoint = rewriter.saveInsertionPoint(); 199 // Create C pointer variable in the entry block to store the alloc 200 // and access it indirectly in the entry points that do not dominate. 201 rewriter.setInsertionPointToStart(&owningRegion.front()); 202 mlir::Type heapType = fir::HeapType::get(alloca.getInType()); 203 mlir::Value ptrVar = rewriter.create<fir::AllocaOp>(loc, heapType); 204 mlir::Value nullPtr = rewriter.create<fir::ZeroOp>(loc, heapType); 205 rewriter.create<fir::StoreOp>(loc, nullPtr, ptrVar); 206 // TODO: introducing a pointer compare op in FIR would help 207 // generating less IR here. 208 mlir::Type intPtrTy = fir::getIntPtrType(rewriter); 209 mlir::Value c0 = rewriter.create<mlir::arith::ConstantOp>( 210 loc, intPtrTy, rewriter.getIntegerAttr(intPtrTy, 0)); 211 212 // Store new storage address right after its creation. 213 rewriter.restoreInsertionPoint(replacementInsertPoint); 214 mlir::Value castReplacement = 215 fir::factory::createConvert(rewriter, loc, heapType, replacement); 216 rewriter.create<fir::StoreOp>(loc, castReplacement, ptrVar); 217 218 // Generate conditional deallocation at every deallocation point. 219 auto genConditionalDealloc = [&](mlir::Location loc) { 220 mlir::Value ptrVal = rewriter.create<fir::LoadOp>(loc, ptrVar); 221 mlir::Value ptrToInt = 222 rewriter.create<fir::ConvertOp>(loc, intPtrTy, ptrVal); 223 mlir::Value isAllocated = rewriter.create<mlir::arith::CmpIOp>( 224 loc, mlir::arith::CmpIPredicate::ne, ptrToInt, c0); 225 auto ifOp = rewriter.create<fir::IfOp>(loc, std::nullopt, isAllocated, 226 /*withElseRegion=*/false); 227 rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front()); 228 mlir::Value cast = fir::factory::createConvert( 229 rewriter, loc, replacement.getType(), ptrVal); 230 deallocGenerator(loc, rewriter, cast); 231 // Currently there is no need to reset the pointer var because two 232 // deallocation points can never be reached without going through the 233 // alloca. 234 rewriter.setInsertionPointAfter(ifOp); 235 }; 236 for (mlir::Operation *deallocPoint : deallocationPoints) { 237 rewriter.setInsertionPoint(deallocPoint); 238 genConditionalDealloc(deallocPoint->getLoc()); 239 } 240 } 241 242 bool AllocaReplaceImpl::replace(mlir::RewriterBase &rewriter, 243 fir::AllocaOp alloca) { 244 llvm::SmallVector<mlir::Operation *> deallocationPoints; 245 mlir::Region *owningRegion = 246 findDeallocationPointsAndOwner(alloca, deallocationPoints); 247 if (!owningRegion) 248 return false; 249 rewriter.setInsertionPointAfter(alloca.getOperation()); 250 bool deallocPointsDominateAlloc = 251 allocDominatesDealloc(alloca, deallocationPoints); 252 if (mlir::Value replacement = 253 allocaRewriter(rewriter, alloca, deallocPointsDominateAlloc)) { 254 mlir::Value castReplacement = fir::factory::createConvert( 255 rewriter, alloca.getLoc(), alloca.getType(), replacement); 256 if (deallocPointsDominateAlloc) 257 for (mlir::Operation *deallocPoint : deallocationPoints) { 258 rewriter.setInsertionPoint(deallocPoint); 259 deallocGenerator(deallocPoint->getLoc(), rewriter, replacement); 260 } 261 else 262 genIndirectDeallocation(rewriter, alloca, deallocationPoints, replacement, 263 *owningRegion); 264 rewriter.replaceOp(alloca, castReplacement); 265 } 266 return true; 267 } 268 269 bool fir::replaceAllocas(mlir::RewriterBase &rewriter, 270 mlir::Operation *parentOp, 271 MustRewriteCallBack mustReplace, 272 AllocaRewriterCallBack allocaRewriter, 273 DeallocCallBack deallocGenerator) { 274 // If the parent operation is not an alloca owner, the code below would risk 275 // modifying IR outside of parentOp. 276 if (!fir::AllocaOp::ownsNestedAlloca(parentOp)) 277 return false; 278 auto insertPoint = rewriter.saveInsertionPoint(); 279 bool replacedAllRequestedAlloca = true; 280 AllocaReplaceImpl impl(allocaRewriter, deallocGenerator); 281 parentOp->walk([&](fir::AllocaOp alloca) { 282 if (mustReplace(alloca)) 283 replacedAllRequestedAlloca &= impl.replace(rewriter, alloca); 284 }); 285 rewriter.restoreInsertionPoint(insertPoint); 286 return replacedAllRequestedAlloca; 287 } 288