xref: /llvm-project/flang/lib/Optimizer/Transforms/MemoryUtils.cpp (revision 31087c5e4c8ddfe08ab3ea6d3847e05c4738eeee)
1*31087c5eSjeanPerier //===- MemoryUtils.cpp ----------------------------------------------------===//
2*31087c5eSjeanPerier //
3*31087c5eSjeanPerier // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*31087c5eSjeanPerier // See https://llvm.org/LICENSE.txt for license information.
5*31087c5eSjeanPerier // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*31087c5eSjeanPerier //
7*31087c5eSjeanPerier //===----------------------------------------------------------------------===//
8*31087c5eSjeanPerier 
9*31087c5eSjeanPerier #include "flang/Optimizer/Transforms/MemoryUtils.h"
10*31087c5eSjeanPerier #include "flang/Optimizer/Builder/FIRBuilder.h"
11*31087c5eSjeanPerier #include "flang/Optimizer/Builder/Todo.h"
12*31087c5eSjeanPerier #include "mlir/Dialect/OpenACC/OpenACC.h"
13*31087c5eSjeanPerier #include "mlir/IR/Dominance.h"
14*31087c5eSjeanPerier #include "llvm/ADT/STLExtras.h"
15*31087c5eSjeanPerier 
16*31087c5eSjeanPerier namespace {
17*31087c5eSjeanPerier /// Helper class to detect if an alloca is inside an mlir::Block that can be
18*31087c5eSjeanPerier /// reached again before its deallocation points via block successors. This
19*31087c5eSjeanPerier /// analysis is only valid if the deallocation points are inside (or nested
20*31087c5eSjeanPerier /// inside) the same region as alloca because it does not consider region CFG
21*31087c5eSjeanPerier /// (for instance, the block inside a fir.do_loop is obviously inside a loop,
22*31087c5eSjeanPerier /// but is not a loop formed by blocks). The dominance of the alloca on its
23*31087c5eSjeanPerier /// deallocation points implies this pre-condition (although it is more
24*31087c5eSjeanPerier /// restrictive).
25*31087c5eSjeanPerier class BlockCycleDetector {
26*31087c5eSjeanPerier public:
27*31087c5eSjeanPerier   bool allocaIsInCycle(fir::AllocaOp alloca,
28*31087c5eSjeanPerier                        llvm::ArrayRef<mlir::Operation *> deallocationPoints);
29*31087c5eSjeanPerier 
30*31087c5eSjeanPerier private:
31*31087c5eSjeanPerier   // Cache for blocks owning alloca that have been analyzed. In many Fortran
32*31087c5eSjeanPerier   // programs, allocas are usually made in the same blocks with no block cycles.
33*31087c5eSjeanPerier   // So getting a fast "no" is beneficial.
34*31087c5eSjeanPerier   llvm::DenseMap<mlir::Block *, /*isInCycle*/ bool> analyzed;
35*31087c5eSjeanPerier };
36*31087c5eSjeanPerier } // namespace
37*31087c5eSjeanPerier 
38*31087c5eSjeanPerier namespace {
39*31087c5eSjeanPerier class AllocaReplaceImpl {
40*31087c5eSjeanPerier public:
41*31087c5eSjeanPerier   AllocaReplaceImpl(fir::AllocaRewriterCallBack allocaRewriter,
42*31087c5eSjeanPerier                     fir::DeallocCallBack deallocGenerator)
43*31087c5eSjeanPerier       : allocaRewriter{allocaRewriter}, deallocGenerator{deallocGenerator} {}
44*31087c5eSjeanPerier   bool replace(mlir::RewriterBase &, fir::AllocaOp);
45*31087c5eSjeanPerier 
46*31087c5eSjeanPerier private:
47*31087c5eSjeanPerier   mlir::Region *findDeallocationPointsAndOwner(
48*31087c5eSjeanPerier       fir::AllocaOp alloca,
49*31087c5eSjeanPerier       llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints);
50*31087c5eSjeanPerier   bool
51*31087c5eSjeanPerier   allocDominatesDealloc(fir::AllocaOp alloca,
52*31087c5eSjeanPerier                         llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
53*31087c5eSjeanPerier     return llvm::all_of(deallocationPoints, [&](mlir::Operation *deallocPoint) {
54*31087c5eSjeanPerier       return this->dominanceInfo.properlyDominates(alloca.getOperation(),
55*31087c5eSjeanPerier                                                    deallocPoint);
56*31087c5eSjeanPerier     });
57*31087c5eSjeanPerier   }
58*31087c5eSjeanPerier   void
59*31087c5eSjeanPerier   genIndirectDeallocation(mlir::RewriterBase &, fir::AllocaOp,
60*31087c5eSjeanPerier                           llvm::ArrayRef<mlir::Operation *> deallocationPoints,
61*31087c5eSjeanPerier                           mlir::Value replacement, mlir::Region &owningRegion);
62*31087c5eSjeanPerier 
63*31087c5eSjeanPerier private:
64*31087c5eSjeanPerier   fir::AllocaRewriterCallBack allocaRewriter;
65*31087c5eSjeanPerier   fir::DeallocCallBack deallocGenerator;
66*31087c5eSjeanPerier   mlir::DominanceInfo dominanceInfo;
67*31087c5eSjeanPerier   BlockCycleDetector blockCycleDetector;
68*31087c5eSjeanPerier };
69*31087c5eSjeanPerier } // namespace
70*31087c5eSjeanPerier 
71*31087c5eSjeanPerier static bool
72*31087c5eSjeanPerier allocaIsInCycleImpl(mlir::Block *allocaBlock,
73*31087c5eSjeanPerier                     llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
74*31087c5eSjeanPerier   llvm::DenseSet<mlir::Block *> seen;
75*31087c5eSjeanPerier   // Insert the deallocation point blocks as "seen" so that the block
76*31087c5eSjeanPerier   // traversal will stop at them.
77*31087c5eSjeanPerier   for (mlir::Operation *deallocPoint : deallocationPoints)
78*31087c5eSjeanPerier     seen.insert(deallocPoint->getBlock());
79*31087c5eSjeanPerier   if (seen.contains(allocaBlock))
80*31087c5eSjeanPerier     return false;
81*31087c5eSjeanPerier   // Traverse the block successor graph starting by the alloca block.
82*31087c5eSjeanPerier   llvm::SmallVector<mlir::Block *> successors{allocaBlock};
83*31087c5eSjeanPerier   while (!successors.empty())
84*31087c5eSjeanPerier     for (mlir::Block *next : successors.pop_back_val()->getSuccessors()) {
85*31087c5eSjeanPerier       if (next == allocaBlock)
86*31087c5eSjeanPerier         return true;
87*31087c5eSjeanPerier       if (auto pair = seen.insert(next); pair.second)
88*31087c5eSjeanPerier         successors.push_back(next);
89*31087c5eSjeanPerier     }
90*31087c5eSjeanPerier   // The traversal did not reach the alloca block again.
91*31087c5eSjeanPerier   return false;
92*31087c5eSjeanPerier }
93*31087c5eSjeanPerier bool BlockCycleDetector::allocaIsInCycle(
94*31087c5eSjeanPerier     fir::AllocaOp alloca,
95*31087c5eSjeanPerier     llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
96*31087c5eSjeanPerier   mlir::Block *allocaBlock = alloca->getBlock();
97*31087c5eSjeanPerier   auto analyzedPair = analyzed.try_emplace(allocaBlock, /*isInCycle=*/false);
98*31087c5eSjeanPerier   bool alreadyAnalyzed = !analyzedPair.second;
99*31087c5eSjeanPerier   bool &isInCycle = analyzedPair.first->second;
100*31087c5eSjeanPerier   // Fast exit if block was already analyzed and no cycle was found.
101*31087c5eSjeanPerier   if (alreadyAnalyzed && !isInCycle)
102*31087c5eSjeanPerier     return false;
103*31087c5eSjeanPerier   // If the analysis was not done generically for this block, run it and
104*31087c5eSjeanPerier   // save the result.
105*31087c5eSjeanPerier   if (!alreadyAnalyzed)
106*31087c5eSjeanPerier     isInCycle = allocaIsInCycleImpl(allocaBlock, /*deallocationPoints*/ {});
107*31087c5eSjeanPerier   if (!isInCycle)
108*31087c5eSjeanPerier     return false;
109*31087c5eSjeanPerier   // If the generic analysis found a block loop, see if the deallocation
110*31087c5eSjeanPerier   // point would be reached before reaching the block again. Do not
111*31087c5eSjeanPerier   // cache that analysis that is specific to the deallocation points
112*31087c5eSjeanPerier   // found for this alloca.
113*31087c5eSjeanPerier   return allocaIsInCycleImpl(allocaBlock, deallocationPoints);
114*31087c5eSjeanPerier }
115*31087c5eSjeanPerier 
116*31087c5eSjeanPerier static bool terminatorYieldsMemory(mlir::Operation &terminator) {
117*31087c5eSjeanPerier   return llvm::any_of(terminator.getResults(), [](mlir::OpResult res) {
118*31087c5eSjeanPerier     return fir::conformsWithPassByRef(res.getType());
119*31087c5eSjeanPerier   });
120*31087c5eSjeanPerier }
121*31087c5eSjeanPerier 
122*31087c5eSjeanPerier static bool isRegionTerminator(mlir::Operation &terminator) {
123*31087c5eSjeanPerier   // Using ReturnLike trait is tempting but it is not set on
124*31087c5eSjeanPerier   // all region terminator that matters (like omp::TerminatorOp that
125*31087c5eSjeanPerier   // has no results).
126*31087c5eSjeanPerier   // May be true for dead code. It is not a correctness issue and dead code can
127*31087c5eSjeanPerier   // be eliminated by running region simplification before this utility is
128*31087c5eSjeanPerier   // used.
129*31087c5eSjeanPerier   // May also be true for unreachable like terminators (e.g., after an abort
130*31087c5eSjeanPerier   // call related to Fortran STOP). This is also OK, the inserted deallocation
131*31087c5eSjeanPerier   // will simply never be reached. It is easier for the rest of the code here
132*31087c5eSjeanPerier   // to assume there is always at least one deallocation point, so keep
133*31087c5eSjeanPerier   // unreachable terminators.
134*31087c5eSjeanPerier   return !terminator.hasSuccessors();
135*31087c5eSjeanPerier }
136*31087c5eSjeanPerier 
137*31087c5eSjeanPerier mlir::Region *AllocaReplaceImpl::findDeallocationPointsAndOwner(
138*31087c5eSjeanPerier     fir::AllocaOp alloca,
139*31087c5eSjeanPerier     llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints) {
140*31087c5eSjeanPerier   // Step 1: Identify the operation and region owning the alloca.
141*31087c5eSjeanPerier   mlir::Region *owningRegion = alloca.getOwnerRegion();
142*31087c5eSjeanPerier   if (!owningRegion)
143*31087c5eSjeanPerier     return nullptr;
144*31087c5eSjeanPerier   mlir::Operation *owningOp = owningRegion->getParentOp();
145*31087c5eSjeanPerier   assert(owningOp && "region expected to be owned");
146*31087c5eSjeanPerier   // Step 2: Identify the exit points of the owning region, they are the default
147*31087c5eSjeanPerier   // deallocation points. TODO: detect and use lifetime markers to get earlier
148*31087c5eSjeanPerier   // deallocation points.
149*31087c5eSjeanPerier   bool isOpenACCMPRecipe = mlir::isa<mlir::accomp::RecipeInterface>(owningOp);
150*31087c5eSjeanPerier   for (mlir::Block &block : owningRegion->getBlocks())
151*31087c5eSjeanPerier     if (mlir::Operation *terminator = block.getTerminator();
152*31087c5eSjeanPerier         isRegionTerminator(*terminator)) {
153*31087c5eSjeanPerier       // FIXME: OpenACC and OpenMP privatization recipe are stand alone
154*31087c5eSjeanPerier       // operation meant to be later "inlined", the value they return may
155*31087c5eSjeanPerier       // be the address of a local alloca. It would be incorrect to insert
156*31087c5eSjeanPerier       // deallocation before the terminator (this would introduce use after
157*31087c5eSjeanPerier       // free once the recipe is inlined.
158*31087c5eSjeanPerier       // This probably require redesign or special handling on the OpenACC/MP
159*31087c5eSjeanPerier       // side.
160*31087c5eSjeanPerier       if (isOpenACCMPRecipe && terminatorYieldsMemory(*terminator))
161*31087c5eSjeanPerier         return nullptr;
162*31087c5eSjeanPerier       deallocationPoints.push_back(terminator);
163*31087c5eSjeanPerier     }
164*31087c5eSjeanPerier   // If no block terminators without successors have been found, this is
165*31087c5eSjeanPerier   // an odd region we cannot reason about (never seen yet in FIR and
166*31087c5eSjeanPerier   // mainstream dialects, but MLIR does not really prevent it).
167*31087c5eSjeanPerier   if (deallocationPoints.empty())
168*31087c5eSjeanPerier     return nullptr;
169*31087c5eSjeanPerier 
170*31087c5eSjeanPerier   // Step 3: detect block based loops between the allocation and deallocation
171*31087c5eSjeanPerier   // points, and add a deallocation point on the back edge to avoid memory
172*31087c5eSjeanPerier   // leaks.
173*31087c5eSjeanPerier   // The detection avoids doing region CFG analysis by assuming that there may
174*31087c5eSjeanPerier   // be cycles if deallocation points are not dominated by the alloca.
175*31087c5eSjeanPerier   // This leaves the cases where the deallocation points are in the same region
176*31087c5eSjeanPerier   // as the alloca (or nested inside it). In which cases there may be a back
177*31087c5eSjeanPerier   // edge between the alloca and the deallocation point via block successors. An
178*31087c5eSjeanPerier   // analysis is run to detect those cases.
179*31087c5eSjeanPerier   // When a loop is detected, the easiest solution to deallocate on the back
180*31087c5eSjeanPerier   // edge is to store the allocated memory address in a variable (that dominates
181*31087c5eSjeanPerier   // the loops) and to deallocate the address in that variable if it is set
182*31087c5eSjeanPerier   // before executing the allocation. This strategy still leads to correct
183*31087c5eSjeanPerier   // execution in the "false positive" cases.
184*31087c5eSjeanPerier   // Hence, the alloca is added as a deallocation point when there is no
185*31087c5eSjeanPerier   // dominance. Note that bringing lifetime markers above will reduce the
186*31087c5eSjeanPerier   // false positives.
187*31087c5eSjeanPerier   if (!allocDominatesDealloc(alloca, deallocationPoints) ||
188*31087c5eSjeanPerier       blockCycleDetector.allocaIsInCycle(alloca, deallocationPoints))
189*31087c5eSjeanPerier     deallocationPoints.push_back(alloca.getOperation());
190*31087c5eSjeanPerier   return owningRegion;
191*31087c5eSjeanPerier }
192*31087c5eSjeanPerier 
193*31087c5eSjeanPerier void AllocaReplaceImpl::genIndirectDeallocation(
194*31087c5eSjeanPerier     mlir::RewriterBase &rewriter, fir::AllocaOp alloca,
195*31087c5eSjeanPerier     llvm::ArrayRef<mlir::Operation *> deallocationPoints,
196*31087c5eSjeanPerier     mlir::Value replacement, mlir::Region &owningRegion) {
197*31087c5eSjeanPerier   mlir::Location loc = alloca.getLoc();
198*31087c5eSjeanPerier   auto replacementInsertPoint = rewriter.saveInsertionPoint();
199*31087c5eSjeanPerier   // Create C pointer variable in the entry block to store the alloc
200*31087c5eSjeanPerier   // and access it indirectly in the entry points that do not dominate.
201*31087c5eSjeanPerier   rewriter.setInsertionPointToStart(&owningRegion.front());
202*31087c5eSjeanPerier   mlir::Type heapType = fir::HeapType::get(alloca.getInType());
203*31087c5eSjeanPerier   mlir::Value ptrVar = rewriter.create<fir::AllocaOp>(loc, heapType);
204*31087c5eSjeanPerier   mlir::Value nullPtr = rewriter.create<fir::ZeroOp>(loc, heapType);
205*31087c5eSjeanPerier   rewriter.create<fir::StoreOp>(loc, nullPtr, ptrVar);
206*31087c5eSjeanPerier   // TODO: introducing a pointer compare op in FIR would help
207*31087c5eSjeanPerier   // generating less IR here.
208*31087c5eSjeanPerier   mlir::Type intPtrTy = fir::getIntPtrType(rewriter);
209*31087c5eSjeanPerier   mlir::Value c0 = rewriter.create<mlir::arith::ConstantOp>(
210*31087c5eSjeanPerier       loc, intPtrTy, rewriter.getIntegerAttr(intPtrTy, 0));
211*31087c5eSjeanPerier 
212*31087c5eSjeanPerier   // Store new storage address right after its creation.
213*31087c5eSjeanPerier   rewriter.restoreInsertionPoint(replacementInsertPoint);
214*31087c5eSjeanPerier   mlir::Value castReplacement =
215*31087c5eSjeanPerier       fir::factory::createConvert(rewriter, loc, heapType, replacement);
216*31087c5eSjeanPerier   rewriter.create<fir::StoreOp>(loc, castReplacement, ptrVar);
217*31087c5eSjeanPerier 
218*31087c5eSjeanPerier   // Generate conditional deallocation at every deallocation point.
219*31087c5eSjeanPerier   auto genConditionalDealloc = [&](mlir::Location loc) {
220*31087c5eSjeanPerier     mlir::Value ptrVal = rewriter.create<fir::LoadOp>(loc, ptrVar);
221*31087c5eSjeanPerier     mlir::Value ptrToInt =
222*31087c5eSjeanPerier         rewriter.create<fir::ConvertOp>(loc, intPtrTy, ptrVal);
223*31087c5eSjeanPerier     mlir::Value isAllocated = rewriter.create<mlir::arith::CmpIOp>(
224*31087c5eSjeanPerier         loc, mlir::arith::CmpIPredicate::ne, ptrToInt, c0);
225*31087c5eSjeanPerier     auto ifOp = rewriter.create<fir::IfOp>(loc, std::nullopt, isAllocated,
226*31087c5eSjeanPerier                                            /*withElseRegion=*/false);
227*31087c5eSjeanPerier     rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front());
228*31087c5eSjeanPerier     mlir::Value cast = fir::factory::createConvert(
229*31087c5eSjeanPerier         rewriter, loc, replacement.getType(), ptrVal);
230*31087c5eSjeanPerier     deallocGenerator(loc, rewriter, cast);
231*31087c5eSjeanPerier     // Currently there is no need to reset the pointer var because two
232*31087c5eSjeanPerier     // deallocation points can never be reached without going through the
233*31087c5eSjeanPerier     // alloca.
234*31087c5eSjeanPerier     rewriter.setInsertionPointAfter(ifOp);
235*31087c5eSjeanPerier   };
236*31087c5eSjeanPerier   for (mlir::Operation *deallocPoint : deallocationPoints) {
237*31087c5eSjeanPerier     rewriter.setInsertionPoint(deallocPoint);
238*31087c5eSjeanPerier     genConditionalDealloc(deallocPoint->getLoc());
239*31087c5eSjeanPerier   }
240*31087c5eSjeanPerier }
241*31087c5eSjeanPerier 
242*31087c5eSjeanPerier bool AllocaReplaceImpl::replace(mlir::RewriterBase &rewriter,
243*31087c5eSjeanPerier                                 fir::AllocaOp alloca) {
244*31087c5eSjeanPerier   llvm::SmallVector<mlir::Operation *> deallocationPoints;
245*31087c5eSjeanPerier   mlir::Region *owningRegion =
246*31087c5eSjeanPerier       findDeallocationPointsAndOwner(alloca, deallocationPoints);
247*31087c5eSjeanPerier   if (!owningRegion)
248*31087c5eSjeanPerier     return false;
249*31087c5eSjeanPerier   rewriter.setInsertionPointAfter(alloca.getOperation());
250*31087c5eSjeanPerier   bool deallocPointsDominateAlloc =
251*31087c5eSjeanPerier       allocDominatesDealloc(alloca, deallocationPoints);
252*31087c5eSjeanPerier   if (mlir::Value replacement =
253*31087c5eSjeanPerier           allocaRewriter(rewriter, alloca, deallocPointsDominateAlloc)) {
254*31087c5eSjeanPerier     mlir::Value castReplacement = fir::factory::createConvert(
255*31087c5eSjeanPerier         rewriter, alloca.getLoc(), alloca.getType(), replacement);
256*31087c5eSjeanPerier     if (deallocPointsDominateAlloc)
257*31087c5eSjeanPerier       for (mlir::Operation *deallocPoint : deallocationPoints) {
258*31087c5eSjeanPerier         rewriter.setInsertionPoint(deallocPoint);
259*31087c5eSjeanPerier         deallocGenerator(deallocPoint->getLoc(), rewriter, replacement);
260*31087c5eSjeanPerier       }
261*31087c5eSjeanPerier     else
262*31087c5eSjeanPerier       genIndirectDeallocation(rewriter, alloca, deallocationPoints, replacement,
263*31087c5eSjeanPerier                               *owningRegion);
264*31087c5eSjeanPerier     rewriter.replaceOp(alloca, castReplacement);
265*31087c5eSjeanPerier   }
266*31087c5eSjeanPerier   return true;
267*31087c5eSjeanPerier }
268*31087c5eSjeanPerier 
269*31087c5eSjeanPerier bool fir::replaceAllocas(mlir::RewriterBase &rewriter,
270*31087c5eSjeanPerier                          mlir::Operation *parentOp,
271*31087c5eSjeanPerier                          MustRewriteCallBack mustReplace,
272*31087c5eSjeanPerier                          AllocaRewriterCallBack allocaRewriter,
273*31087c5eSjeanPerier                          DeallocCallBack deallocGenerator) {
274*31087c5eSjeanPerier   // If the parent operation is not an alloca owner, the code below would risk
275*31087c5eSjeanPerier   // modifying IR outside of parentOp.
276*31087c5eSjeanPerier   if (!fir::AllocaOp::ownsNestedAlloca(parentOp))
277*31087c5eSjeanPerier     return false;
278*31087c5eSjeanPerier   auto insertPoint = rewriter.saveInsertionPoint();
279*31087c5eSjeanPerier   bool replacedAllRequestedAlloca = true;
280*31087c5eSjeanPerier   AllocaReplaceImpl impl(allocaRewriter, deallocGenerator);
281*31087c5eSjeanPerier   parentOp->walk([&](fir::AllocaOp alloca) {
282*31087c5eSjeanPerier     if (mustReplace(alloca))
283*31087c5eSjeanPerier       replacedAllRequestedAlloca &= impl.replace(rewriter, alloca);
284*31087c5eSjeanPerier   });
285*31087c5eSjeanPerier   rewriter.restoreInsertionPoint(insertPoint);
286*31087c5eSjeanPerier   return replacedAllRequestedAlloca;
287*31087c5eSjeanPerier }
288