xref: /openbsd-src/gnu/llvm/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp (revision 09467b48e8bc8b4905716062da846024139afbf2)
1*09467b48Spatrick //===----------------- LoopRotationUtils.cpp -----------------------------===//
2*09467b48Spatrick //
3*09467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*09467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
5*09467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*09467b48Spatrick //
7*09467b48Spatrick //===----------------------------------------------------------------------===//
8*09467b48Spatrick //
9*09467b48Spatrick // This file provides utilities to convert a loop into a loop with bottom test.
10*09467b48Spatrick //
11*09467b48Spatrick //===----------------------------------------------------------------------===//
12*09467b48Spatrick 
13*09467b48Spatrick #include "llvm/Transforms/Utils/LoopRotationUtils.h"
14*09467b48Spatrick #include "llvm/ADT/Statistic.h"
15*09467b48Spatrick #include "llvm/Analysis/AliasAnalysis.h"
16*09467b48Spatrick #include "llvm/Analysis/AssumptionCache.h"
17*09467b48Spatrick #include "llvm/Analysis/BasicAliasAnalysis.h"
18*09467b48Spatrick #include "llvm/Analysis/CodeMetrics.h"
19*09467b48Spatrick #include "llvm/Analysis/DomTreeUpdater.h"
20*09467b48Spatrick #include "llvm/Analysis/GlobalsModRef.h"
21*09467b48Spatrick #include "llvm/Analysis/InstructionSimplify.h"
22*09467b48Spatrick #include "llvm/Analysis/LoopPass.h"
23*09467b48Spatrick #include "llvm/Analysis/MemorySSA.h"
24*09467b48Spatrick #include "llvm/Analysis/MemorySSAUpdater.h"
25*09467b48Spatrick #include "llvm/Analysis/ScalarEvolution.h"
26*09467b48Spatrick #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
27*09467b48Spatrick #include "llvm/Analysis/TargetTransformInfo.h"
28*09467b48Spatrick #include "llvm/Analysis/ValueTracking.h"
29*09467b48Spatrick #include "llvm/IR/CFG.h"
30*09467b48Spatrick #include "llvm/IR/DebugInfoMetadata.h"
31*09467b48Spatrick #include "llvm/IR/Dominators.h"
32*09467b48Spatrick #include "llvm/IR/Function.h"
33*09467b48Spatrick #include "llvm/IR/IntrinsicInst.h"
34*09467b48Spatrick #include "llvm/IR/Module.h"
35*09467b48Spatrick #include "llvm/Support/CommandLine.h"
36*09467b48Spatrick #include "llvm/Support/Debug.h"
37*09467b48Spatrick #include "llvm/Support/raw_ostream.h"
38*09467b48Spatrick #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39*09467b48Spatrick #include "llvm/Transforms/Utils/Local.h"
40*09467b48Spatrick #include "llvm/Transforms/Utils/LoopUtils.h"
41*09467b48Spatrick #include "llvm/Transforms/Utils/SSAUpdater.h"
42*09467b48Spatrick #include "llvm/Transforms/Utils/ValueMapper.h"
43*09467b48Spatrick using namespace llvm;
44*09467b48Spatrick 
45*09467b48Spatrick #define DEBUG_TYPE "loop-rotate"
46*09467b48Spatrick 
47*09467b48Spatrick STATISTIC(NumRotated, "Number of loops rotated");
48*09467b48Spatrick 
49*09467b48Spatrick namespace {
50*09467b48Spatrick /// A simple loop rotation transformation.
51*09467b48Spatrick class LoopRotate {
52*09467b48Spatrick   const unsigned MaxHeaderSize;
53*09467b48Spatrick   LoopInfo *LI;
54*09467b48Spatrick   const TargetTransformInfo *TTI;
55*09467b48Spatrick   AssumptionCache *AC;
56*09467b48Spatrick   DominatorTree *DT;
57*09467b48Spatrick   ScalarEvolution *SE;
58*09467b48Spatrick   MemorySSAUpdater *MSSAU;
59*09467b48Spatrick   const SimplifyQuery &SQ;
60*09467b48Spatrick   bool RotationOnly;
61*09467b48Spatrick   bool IsUtilMode;
62*09467b48Spatrick 
63*09467b48Spatrick public:
64*09467b48Spatrick   LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
65*09467b48Spatrick              const TargetTransformInfo *TTI, AssumptionCache *AC,
66*09467b48Spatrick              DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
67*09467b48Spatrick              const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
68*09467b48Spatrick       : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
69*09467b48Spatrick         MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
70*09467b48Spatrick         IsUtilMode(IsUtilMode) {}
71*09467b48Spatrick   bool processLoop(Loop *L);
72*09467b48Spatrick 
73*09467b48Spatrick private:
74*09467b48Spatrick   bool rotateLoop(Loop *L, bool SimplifiedLatch);
75*09467b48Spatrick   bool simplifyLoopLatch(Loop *L);
76*09467b48Spatrick };
77*09467b48Spatrick } // end anonymous namespace
78*09467b48Spatrick 
79*09467b48Spatrick /// Insert (K, V) pair into the ValueToValueMap, and verify the key did not
80*09467b48Spatrick /// previously exist in the map, and the value was inserted.
81*09467b48Spatrick static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) {
82*09467b48Spatrick   bool Inserted = VM.insert({K, V}).second;
83*09467b48Spatrick   assert(Inserted);
84*09467b48Spatrick   (void)Inserted;
85*09467b48Spatrick }
86*09467b48Spatrick /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
87*09467b48Spatrick /// old header into the preheader.  If there were uses of the values produced by
88*09467b48Spatrick /// these instruction that were outside of the loop, we have to insert PHI nodes
89*09467b48Spatrick /// to merge the two values.  Do this now.
90*09467b48Spatrick static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
91*09467b48Spatrick                                             BasicBlock *OrigPreheader,
92*09467b48Spatrick                                             ValueToValueMapTy &ValueMap,
93*09467b48Spatrick                                 SmallVectorImpl<PHINode*> *InsertedPHIs) {
94*09467b48Spatrick   // Remove PHI node entries that are no longer live.
95*09467b48Spatrick   BasicBlock::iterator I, E = OrigHeader->end();
96*09467b48Spatrick   for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
97*09467b48Spatrick     PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
98*09467b48Spatrick 
99*09467b48Spatrick   // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
100*09467b48Spatrick   // as necessary.
101*09467b48Spatrick   SSAUpdater SSA(InsertedPHIs);
102*09467b48Spatrick   for (I = OrigHeader->begin(); I != E; ++I) {
103*09467b48Spatrick     Value *OrigHeaderVal = &*I;
104*09467b48Spatrick 
105*09467b48Spatrick     // If there are no uses of the value (e.g. because it returns void), there
106*09467b48Spatrick     // is nothing to rewrite.
107*09467b48Spatrick     if (OrigHeaderVal->use_empty())
108*09467b48Spatrick       continue;
109*09467b48Spatrick 
110*09467b48Spatrick     Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
111*09467b48Spatrick 
112*09467b48Spatrick     // The value now exits in two versions: the initial value in the preheader
113*09467b48Spatrick     // and the loop "next" value in the original header.
114*09467b48Spatrick     SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
115*09467b48Spatrick     SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
116*09467b48Spatrick     SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
117*09467b48Spatrick 
118*09467b48Spatrick     // Visit each use of the OrigHeader instruction.
119*09467b48Spatrick     for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
120*09467b48Spatrick                              UE = OrigHeaderVal->use_end();
121*09467b48Spatrick          UI != UE;) {
122*09467b48Spatrick       // Grab the use before incrementing the iterator.
123*09467b48Spatrick       Use &U = *UI;
124*09467b48Spatrick 
125*09467b48Spatrick       // Increment the iterator before removing the use from the list.
126*09467b48Spatrick       ++UI;
127*09467b48Spatrick 
128*09467b48Spatrick       // SSAUpdater can't handle a non-PHI use in the same block as an
129*09467b48Spatrick       // earlier def. We can easily handle those cases manually.
130*09467b48Spatrick       Instruction *UserInst = cast<Instruction>(U.getUser());
131*09467b48Spatrick       if (!isa<PHINode>(UserInst)) {
132*09467b48Spatrick         BasicBlock *UserBB = UserInst->getParent();
133*09467b48Spatrick 
134*09467b48Spatrick         // The original users in the OrigHeader are already using the
135*09467b48Spatrick         // original definitions.
136*09467b48Spatrick         if (UserBB == OrigHeader)
137*09467b48Spatrick           continue;
138*09467b48Spatrick 
139*09467b48Spatrick         // Users in the OrigPreHeader need to use the value to which the
140*09467b48Spatrick         // original definitions are mapped.
141*09467b48Spatrick         if (UserBB == OrigPreheader) {
142*09467b48Spatrick           U = OrigPreHeaderVal;
143*09467b48Spatrick           continue;
144*09467b48Spatrick         }
145*09467b48Spatrick       }
146*09467b48Spatrick 
147*09467b48Spatrick       // Anything else can be handled by SSAUpdater.
148*09467b48Spatrick       SSA.RewriteUse(U);
149*09467b48Spatrick     }
150*09467b48Spatrick 
151*09467b48Spatrick     // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
152*09467b48Spatrick     // intrinsics.
153*09467b48Spatrick     SmallVector<DbgValueInst *, 1> DbgValues;
154*09467b48Spatrick     llvm::findDbgValues(DbgValues, OrigHeaderVal);
155*09467b48Spatrick     for (auto &DbgValue : DbgValues) {
156*09467b48Spatrick       // The original users in the OrigHeader are already using the original
157*09467b48Spatrick       // definitions.
158*09467b48Spatrick       BasicBlock *UserBB = DbgValue->getParent();
159*09467b48Spatrick       if (UserBB == OrigHeader)
160*09467b48Spatrick         continue;
161*09467b48Spatrick 
162*09467b48Spatrick       // Users in the OrigPreHeader need to use the value to which the
163*09467b48Spatrick       // original definitions are mapped and anything else can be handled by
164*09467b48Spatrick       // the SSAUpdater. To avoid adding PHINodes, check if the value is
165*09467b48Spatrick       // available in UserBB, if not substitute undef.
166*09467b48Spatrick       Value *NewVal;
167*09467b48Spatrick       if (UserBB == OrigPreheader)
168*09467b48Spatrick         NewVal = OrigPreHeaderVal;
169*09467b48Spatrick       else if (SSA.HasValueForBlock(UserBB))
170*09467b48Spatrick         NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
171*09467b48Spatrick       else
172*09467b48Spatrick         NewVal = UndefValue::get(OrigHeaderVal->getType());
173*09467b48Spatrick       DbgValue->setOperand(0,
174*09467b48Spatrick                            MetadataAsValue::get(OrigHeaderVal->getContext(),
175*09467b48Spatrick                                                 ValueAsMetadata::get(NewVal)));
176*09467b48Spatrick     }
177*09467b48Spatrick   }
178*09467b48Spatrick }
179*09467b48Spatrick 
180*09467b48Spatrick // Look for a phi which is only used outside the loop (via a LCSSA phi)
181*09467b48Spatrick // in the exit from the header. This means that rotating the loop can
182*09467b48Spatrick // remove the phi.
183*09467b48Spatrick static bool shouldRotateLoopExitingLatch(Loop *L) {
184*09467b48Spatrick   BasicBlock *Header = L->getHeader();
185*09467b48Spatrick   BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0);
186*09467b48Spatrick   if (L->contains(HeaderExit))
187*09467b48Spatrick     HeaderExit = Header->getTerminator()->getSuccessor(1);
188*09467b48Spatrick 
189*09467b48Spatrick   for (auto &Phi : Header->phis()) {
190*09467b48Spatrick     // Look for uses of this phi in the loop/via exits other than the header.
191*09467b48Spatrick     if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) {
192*09467b48Spatrick           return cast<Instruction>(U)->getParent() != HeaderExit;
193*09467b48Spatrick         }))
194*09467b48Spatrick       continue;
195*09467b48Spatrick     return true;
196*09467b48Spatrick   }
197*09467b48Spatrick 
198*09467b48Spatrick   return false;
199*09467b48Spatrick }
200*09467b48Spatrick 
201*09467b48Spatrick /// Rotate loop LP. Return true if the loop is rotated.
202*09467b48Spatrick ///
203*09467b48Spatrick /// \param SimplifiedLatch is true if the latch was just folded into the final
204*09467b48Spatrick /// loop exit. In this case we may want to rotate even though the new latch is
205*09467b48Spatrick /// now an exiting branch. This rotation would have happened had the latch not
206*09467b48Spatrick /// been simplified. However, if SimplifiedLatch is false, then we avoid
207*09467b48Spatrick /// rotating loops in which the latch exits to avoid excessive or endless
208*09467b48Spatrick /// rotation. LoopRotate should be repeatable and converge to a canonical
209*09467b48Spatrick /// form. This property is satisfied because simplifying the loop latch can only
210*09467b48Spatrick /// happen once across multiple invocations of the LoopRotate pass.
211*09467b48Spatrick bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
212*09467b48Spatrick   // If the loop has only one block then there is not much to rotate.
213*09467b48Spatrick   if (L->getBlocks().size() == 1)
214*09467b48Spatrick     return false;
215*09467b48Spatrick 
216*09467b48Spatrick   BasicBlock *OrigHeader = L->getHeader();
217*09467b48Spatrick   BasicBlock *OrigLatch = L->getLoopLatch();
218*09467b48Spatrick 
219*09467b48Spatrick   BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
220*09467b48Spatrick   if (!BI || BI->isUnconditional())
221*09467b48Spatrick     return false;
222*09467b48Spatrick 
223*09467b48Spatrick   // If the loop header is not one of the loop exiting blocks then
224*09467b48Spatrick   // either this loop is already rotated or it is not
225*09467b48Spatrick   // suitable for loop rotation transformations.
226*09467b48Spatrick   if (!L->isLoopExiting(OrigHeader))
227*09467b48Spatrick     return false;
228*09467b48Spatrick 
229*09467b48Spatrick   // If the loop latch already contains a branch that leaves the loop then the
230*09467b48Spatrick   // loop is already rotated.
231*09467b48Spatrick   if (!OrigLatch)
232*09467b48Spatrick     return false;
233*09467b48Spatrick 
234*09467b48Spatrick   // Rotate if either the loop latch does *not* exit the loop, or if the loop
235*09467b48Spatrick   // latch was just simplified. Or if we think it will be profitable.
236*09467b48Spatrick   if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
237*09467b48Spatrick       !shouldRotateLoopExitingLatch(L))
238*09467b48Spatrick     return false;
239*09467b48Spatrick 
240*09467b48Spatrick   // Check size of original header and reject loop if it is very big or we can't
241*09467b48Spatrick   // duplicate blocks inside it.
242*09467b48Spatrick   {
243*09467b48Spatrick     SmallPtrSet<const Value *, 32> EphValues;
244*09467b48Spatrick     CodeMetrics::collectEphemeralValues(L, AC, EphValues);
245*09467b48Spatrick 
246*09467b48Spatrick     CodeMetrics Metrics;
247*09467b48Spatrick     Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
248*09467b48Spatrick     if (Metrics.notDuplicatable) {
249*09467b48Spatrick       LLVM_DEBUG(
250*09467b48Spatrick           dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
251*09467b48Spatrick                  << " instructions: ";
252*09467b48Spatrick           L->dump());
253*09467b48Spatrick       return false;
254*09467b48Spatrick     }
255*09467b48Spatrick     if (Metrics.convergent) {
256*09467b48Spatrick       LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
257*09467b48Spatrick                            "instructions: ";
258*09467b48Spatrick                  L->dump());
259*09467b48Spatrick       return false;
260*09467b48Spatrick     }
261*09467b48Spatrick     if (Metrics.NumInsts > MaxHeaderSize)
262*09467b48Spatrick       return false;
263*09467b48Spatrick   }
264*09467b48Spatrick 
265*09467b48Spatrick   // Now, this loop is suitable for rotation.
266*09467b48Spatrick   BasicBlock *OrigPreheader = L->getLoopPreheader();
267*09467b48Spatrick 
268*09467b48Spatrick   // If the loop could not be converted to canonical form, it must have an
269*09467b48Spatrick   // indirectbr in it, just give up.
270*09467b48Spatrick   if (!OrigPreheader || !L->hasDedicatedExits())
271*09467b48Spatrick     return false;
272*09467b48Spatrick 
273*09467b48Spatrick   // Anything ScalarEvolution may know about this loop or the PHI nodes
274*09467b48Spatrick   // in its header will soon be invalidated. We should also invalidate
275*09467b48Spatrick   // all outer loops because insertion and deletion of blocks that happens
276*09467b48Spatrick   // during the rotation may violate invariants related to backedge taken
277*09467b48Spatrick   // infos in them.
278*09467b48Spatrick   if (SE)
279*09467b48Spatrick     SE->forgetTopmostLoop(L);
280*09467b48Spatrick 
281*09467b48Spatrick   LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
282*09467b48Spatrick   if (MSSAU && VerifyMemorySSA)
283*09467b48Spatrick     MSSAU->getMemorySSA()->verifyMemorySSA();
284*09467b48Spatrick 
285*09467b48Spatrick   // Find new Loop header. NewHeader is a Header's one and only successor
286*09467b48Spatrick   // that is inside loop.  Header's other successor is outside the
287*09467b48Spatrick   // loop.  Otherwise loop is not suitable for rotation.
288*09467b48Spatrick   BasicBlock *Exit = BI->getSuccessor(0);
289*09467b48Spatrick   BasicBlock *NewHeader = BI->getSuccessor(1);
290*09467b48Spatrick   if (L->contains(Exit))
291*09467b48Spatrick     std::swap(Exit, NewHeader);
292*09467b48Spatrick   assert(NewHeader && "Unable to determine new loop header");
293*09467b48Spatrick   assert(L->contains(NewHeader) && !L->contains(Exit) &&
294*09467b48Spatrick          "Unable to determine loop header and exit blocks");
295*09467b48Spatrick 
296*09467b48Spatrick   // This code assumes that the new header has exactly one predecessor.
297*09467b48Spatrick   // Remove any single-entry PHI nodes in it.
298*09467b48Spatrick   assert(NewHeader->getSinglePredecessor() &&
299*09467b48Spatrick          "New header doesn't have one pred!");
300*09467b48Spatrick   FoldSingleEntryPHINodes(NewHeader);
301*09467b48Spatrick 
302*09467b48Spatrick   // Begin by walking OrigHeader and populating ValueMap with an entry for
303*09467b48Spatrick   // each Instruction.
304*09467b48Spatrick   BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
305*09467b48Spatrick   ValueToValueMapTy ValueMap, ValueMapMSSA;
306*09467b48Spatrick 
307*09467b48Spatrick   // For PHI nodes, the value available in OldPreHeader is just the
308*09467b48Spatrick   // incoming value from OldPreHeader.
309*09467b48Spatrick   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
310*09467b48Spatrick     InsertNewValueIntoMap(ValueMap, PN,
311*09467b48Spatrick                           PN->getIncomingValueForBlock(OrigPreheader));
312*09467b48Spatrick 
313*09467b48Spatrick   // For the rest of the instructions, either hoist to the OrigPreheader if
314*09467b48Spatrick   // possible or create a clone in the OldPreHeader if not.
315*09467b48Spatrick   Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
316*09467b48Spatrick 
317*09467b48Spatrick   // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
318*09467b48Spatrick   using DbgIntrinsicHash =
319*09467b48Spatrick       std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
320*09467b48Spatrick   auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
321*09467b48Spatrick     return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
322*09467b48Spatrick   };
323*09467b48Spatrick   SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
324*09467b48Spatrick   for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
325*09467b48Spatrick        I != E; ++I) {
326*09467b48Spatrick     if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
327*09467b48Spatrick       DbgIntrinsics.insert(makeHash(DII));
328*09467b48Spatrick     else
329*09467b48Spatrick       break;
330*09467b48Spatrick   }
331*09467b48Spatrick 
332*09467b48Spatrick   while (I != E) {
333*09467b48Spatrick     Instruction *Inst = &*I++;
334*09467b48Spatrick 
335*09467b48Spatrick     // If the instruction's operands are invariant and it doesn't read or write
336*09467b48Spatrick     // memory, then it is safe to hoist.  Doing this doesn't change the order of
337*09467b48Spatrick     // execution in the preheader, but does prevent the instruction from
338*09467b48Spatrick     // executing in each iteration of the loop.  This means it is safe to hoist
339*09467b48Spatrick     // something that might trap, but isn't safe to hoist something that reads
340*09467b48Spatrick     // memory (without proving that the loop doesn't write).
341*09467b48Spatrick     if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
342*09467b48Spatrick         !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
343*09467b48Spatrick         !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
344*09467b48Spatrick       Inst->moveBefore(LoopEntryBranch);
345*09467b48Spatrick       continue;
346*09467b48Spatrick     }
347*09467b48Spatrick 
348*09467b48Spatrick     // Otherwise, create a duplicate of the instruction.
349*09467b48Spatrick     Instruction *C = Inst->clone();
350*09467b48Spatrick 
351*09467b48Spatrick     // Eagerly remap the operands of the instruction.
352*09467b48Spatrick     RemapInstruction(C, ValueMap,
353*09467b48Spatrick                      RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
354*09467b48Spatrick 
355*09467b48Spatrick     // Avoid inserting the same intrinsic twice.
356*09467b48Spatrick     if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
357*09467b48Spatrick       if (DbgIntrinsics.count(makeHash(DII))) {
358*09467b48Spatrick         C->deleteValue();
359*09467b48Spatrick         continue;
360*09467b48Spatrick       }
361*09467b48Spatrick 
362*09467b48Spatrick     // With the operands remapped, see if the instruction constant folds or is
363*09467b48Spatrick     // otherwise simplifyable.  This commonly occurs because the entry from PHI
364*09467b48Spatrick     // nodes allows icmps and other instructions to fold.
365*09467b48Spatrick     Value *V = SimplifyInstruction(C, SQ);
366*09467b48Spatrick     if (V && LI->replacementPreservesLCSSAForm(C, V)) {
367*09467b48Spatrick       // If so, then delete the temporary instruction and stick the folded value
368*09467b48Spatrick       // in the map.
369*09467b48Spatrick       InsertNewValueIntoMap(ValueMap, Inst, V);
370*09467b48Spatrick       if (!C->mayHaveSideEffects()) {
371*09467b48Spatrick         C->deleteValue();
372*09467b48Spatrick         C = nullptr;
373*09467b48Spatrick       }
374*09467b48Spatrick     } else {
375*09467b48Spatrick       InsertNewValueIntoMap(ValueMap, Inst, C);
376*09467b48Spatrick     }
377*09467b48Spatrick     if (C) {
378*09467b48Spatrick       // Otherwise, stick the new instruction into the new block!
379*09467b48Spatrick       C->setName(Inst->getName());
380*09467b48Spatrick       C->insertBefore(LoopEntryBranch);
381*09467b48Spatrick 
382*09467b48Spatrick       if (auto *II = dyn_cast<IntrinsicInst>(C))
383*09467b48Spatrick         if (II->getIntrinsicID() == Intrinsic::assume)
384*09467b48Spatrick           AC->registerAssumption(II);
385*09467b48Spatrick       // MemorySSA cares whether the cloned instruction was inserted or not, and
386*09467b48Spatrick       // not whether it can be remapped to a simplified value.
387*09467b48Spatrick       if (MSSAU)
388*09467b48Spatrick         InsertNewValueIntoMap(ValueMapMSSA, Inst, C);
389*09467b48Spatrick     }
390*09467b48Spatrick   }
391*09467b48Spatrick 
392*09467b48Spatrick   // Along with all the other instructions, we just cloned OrigHeader's
393*09467b48Spatrick   // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
394*09467b48Spatrick   // successors by duplicating their incoming values for OrigHeader.
395*09467b48Spatrick   for (BasicBlock *SuccBB : successors(OrigHeader))
396*09467b48Spatrick     for (BasicBlock::iterator BI = SuccBB->begin();
397*09467b48Spatrick          PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
398*09467b48Spatrick       PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
399*09467b48Spatrick 
400*09467b48Spatrick   // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
401*09467b48Spatrick   // OrigPreHeader's old terminator (the original branch into the loop), and
402*09467b48Spatrick   // remove the corresponding incoming values from the PHI nodes in OrigHeader.
403*09467b48Spatrick   LoopEntryBranch->eraseFromParent();
404*09467b48Spatrick 
405*09467b48Spatrick   // Update MemorySSA before the rewrite call below changes the 1:1
406*09467b48Spatrick   // instruction:cloned_instruction_or_value mapping.
407*09467b48Spatrick   if (MSSAU) {
408*09467b48Spatrick     InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader);
409*09467b48Spatrick     MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
410*09467b48Spatrick                                         ValueMapMSSA);
411*09467b48Spatrick   }
412*09467b48Spatrick 
413*09467b48Spatrick   SmallVector<PHINode*, 2> InsertedPHIs;
414*09467b48Spatrick   // If there were any uses of instructions in the duplicated block outside the
415*09467b48Spatrick   // loop, update them, inserting PHI nodes as required
416*09467b48Spatrick   RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
417*09467b48Spatrick                                   &InsertedPHIs);
418*09467b48Spatrick 
419*09467b48Spatrick   // Attach dbg.value intrinsics to the new phis if that phi uses a value that
420*09467b48Spatrick   // previously had debug metadata attached. This keeps the debug info
421*09467b48Spatrick   // up-to-date in the loop body.
422*09467b48Spatrick   if (!InsertedPHIs.empty())
423*09467b48Spatrick     insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
424*09467b48Spatrick 
425*09467b48Spatrick   // NewHeader is now the header of the loop.
426*09467b48Spatrick   L->moveToHeader(NewHeader);
427*09467b48Spatrick   assert(L->getHeader() == NewHeader && "Latch block is our new header");
428*09467b48Spatrick 
429*09467b48Spatrick   // Inform DT about changes to the CFG.
430*09467b48Spatrick   if (DT) {
431*09467b48Spatrick     // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
432*09467b48Spatrick     // the DT about the removed edge to the OrigHeader (that got removed).
433*09467b48Spatrick     SmallVector<DominatorTree::UpdateType, 3> Updates;
434*09467b48Spatrick     Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
435*09467b48Spatrick     Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
436*09467b48Spatrick     Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
437*09467b48Spatrick     DT->applyUpdates(Updates);
438*09467b48Spatrick 
439*09467b48Spatrick     if (MSSAU) {
440*09467b48Spatrick       MSSAU->applyUpdates(Updates, *DT);
441*09467b48Spatrick       if (VerifyMemorySSA)
442*09467b48Spatrick         MSSAU->getMemorySSA()->verifyMemorySSA();
443*09467b48Spatrick     }
444*09467b48Spatrick   }
445*09467b48Spatrick 
446*09467b48Spatrick   // At this point, we've finished our major CFG changes.  As part of cloning
447*09467b48Spatrick   // the loop into the preheader we've simplified instructions and the
448*09467b48Spatrick   // duplicated conditional branch may now be branching on a constant.  If it is
449*09467b48Spatrick   // branching on a constant and if that constant means that we enter the loop,
450*09467b48Spatrick   // then we fold away the cond branch to an uncond branch.  This simplifies the
451*09467b48Spatrick   // loop in cases important for nested loops, and it also means we don't have
452*09467b48Spatrick   // to split as many edges.
453*09467b48Spatrick   BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
454*09467b48Spatrick   assert(PHBI->isConditional() && "Should be clone of BI condbr!");
455*09467b48Spatrick   if (!isa<ConstantInt>(PHBI->getCondition()) ||
456*09467b48Spatrick       PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
457*09467b48Spatrick           NewHeader) {
458*09467b48Spatrick     // The conditional branch can't be folded, handle the general case.
459*09467b48Spatrick     // Split edges as necessary to preserve LoopSimplify form.
460*09467b48Spatrick 
461*09467b48Spatrick     // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
462*09467b48Spatrick     // thus is not a preheader anymore.
463*09467b48Spatrick     // Split the edge to form a real preheader.
464*09467b48Spatrick     BasicBlock *NewPH = SplitCriticalEdge(
465*09467b48Spatrick         OrigPreheader, NewHeader,
466*09467b48Spatrick         CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
467*09467b48Spatrick     NewPH->setName(NewHeader->getName() + ".lr.ph");
468*09467b48Spatrick 
469*09467b48Spatrick     // Preserve canonical loop form, which means that 'Exit' should have only
470*09467b48Spatrick     // one predecessor. Note that Exit could be an exit block for multiple
471*09467b48Spatrick     // nested loops, causing both of the edges to now be critical and need to
472*09467b48Spatrick     // be split.
473*09467b48Spatrick     SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
474*09467b48Spatrick     bool SplitLatchEdge = false;
475*09467b48Spatrick     for (BasicBlock *ExitPred : ExitPreds) {
476*09467b48Spatrick       // We only need to split loop exit edges.
477*09467b48Spatrick       Loop *PredLoop = LI->getLoopFor(ExitPred);
478*09467b48Spatrick       if (!PredLoop || PredLoop->contains(Exit) ||
479*09467b48Spatrick           ExitPred->getTerminator()->isIndirectTerminator())
480*09467b48Spatrick         continue;
481*09467b48Spatrick       SplitLatchEdge |= L->getLoopLatch() == ExitPred;
482*09467b48Spatrick       BasicBlock *ExitSplit = SplitCriticalEdge(
483*09467b48Spatrick           ExitPred, Exit,
484*09467b48Spatrick           CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
485*09467b48Spatrick       ExitSplit->moveBefore(Exit);
486*09467b48Spatrick     }
487*09467b48Spatrick     assert(SplitLatchEdge &&
488*09467b48Spatrick            "Despite splitting all preds, failed to split latch exit?");
489*09467b48Spatrick   } else {
490*09467b48Spatrick     // We can fold the conditional branch in the preheader, this makes things
491*09467b48Spatrick     // simpler. The first step is to remove the extra edge to the Exit block.
492*09467b48Spatrick     Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
493*09467b48Spatrick     BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
494*09467b48Spatrick     NewBI->setDebugLoc(PHBI->getDebugLoc());
495*09467b48Spatrick     PHBI->eraseFromParent();
496*09467b48Spatrick 
497*09467b48Spatrick     // With our CFG finalized, update DomTree if it is available.
498*09467b48Spatrick     if (DT) DT->deleteEdge(OrigPreheader, Exit);
499*09467b48Spatrick 
500*09467b48Spatrick     // Update MSSA too, if available.
501*09467b48Spatrick     if (MSSAU)
502*09467b48Spatrick       MSSAU->removeEdge(OrigPreheader, Exit);
503*09467b48Spatrick   }
504*09467b48Spatrick 
505*09467b48Spatrick   assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
506*09467b48Spatrick   assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
507*09467b48Spatrick 
508*09467b48Spatrick   if (MSSAU && VerifyMemorySSA)
509*09467b48Spatrick     MSSAU->getMemorySSA()->verifyMemorySSA();
510*09467b48Spatrick 
511*09467b48Spatrick   // Now that the CFG and DomTree are in a consistent state again, try to merge
512*09467b48Spatrick   // the OrigHeader block into OrigLatch.  This will succeed if they are
513*09467b48Spatrick   // connected by an unconditional branch.  This is just a cleanup so the
514*09467b48Spatrick   // emitted code isn't too gross in this common case.
515*09467b48Spatrick   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
516*09467b48Spatrick   MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
517*09467b48Spatrick 
518*09467b48Spatrick   if (MSSAU && VerifyMemorySSA)
519*09467b48Spatrick     MSSAU->getMemorySSA()->verifyMemorySSA();
520*09467b48Spatrick 
521*09467b48Spatrick   LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
522*09467b48Spatrick 
523*09467b48Spatrick   ++NumRotated;
524*09467b48Spatrick   return true;
525*09467b48Spatrick }
526*09467b48Spatrick 
527*09467b48Spatrick /// Determine whether the instructions in this range may be safely and cheaply
528*09467b48Spatrick /// speculated. This is not an important enough situation to develop complex
529*09467b48Spatrick /// heuristics. We handle a single arithmetic instruction along with any type
530*09467b48Spatrick /// conversions.
531*09467b48Spatrick static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
532*09467b48Spatrick                                   BasicBlock::iterator End, Loop *L) {
533*09467b48Spatrick   bool seenIncrement = false;
534*09467b48Spatrick   bool MultiExitLoop = false;
535*09467b48Spatrick 
536*09467b48Spatrick   if (!L->getExitingBlock())
537*09467b48Spatrick     MultiExitLoop = true;
538*09467b48Spatrick 
539*09467b48Spatrick   for (BasicBlock::iterator I = Begin; I != End; ++I) {
540*09467b48Spatrick 
541*09467b48Spatrick     if (!isSafeToSpeculativelyExecute(&*I))
542*09467b48Spatrick       return false;
543*09467b48Spatrick 
544*09467b48Spatrick     if (isa<DbgInfoIntrinsic>(I))
545*09467b48Spatrick       continue;
546*09467b48Spatrick 
547*09467b48Spatrick     switch (I->getOpcode()) {
548*09467b48Spatrick     default:
549*09467b48Spatrick       return false;
550*09467b48Spatrick     case Instruction::GetElementPtr:
551*09467b48Spatrick       // GEPs are cheap if all indices are constant.
552*09467b48Spatrick       if (!cast<GEPOperator>(I)->hasAllConstantIndices())
553*09467b48Spatrick         return false;
554*09467b48Spatrick       // fall-thru to increment case
555*09467b48Spatrick       LLVM_FALLTHROUGH;
556*09467b48Spatrick     case Instruction::Add:
557*09467b48Spatrick     case Instruction::Sub:
558*09467b48Spatrick     case Instruction::And:
559*09467b48Spatrick     case Instruction::Or:
560*09467b48Spatrick     case Instruction::Xor:
561*09467b48Spatrick     case Instruction::Shl:
562*09467b48Spatrick     case Instruction::LShr:
563*09467b48Spatrick     case Instruction::AShr: {
564*09467b48Spatrick       Value *IVOpnd =
565*09467b48Spatrick           !isa<Constant>(I->getOperand(0))
566*09467b48Spatrick               ? I->getOperand(0)
567*09467b48Spatrick               : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr;
568*09467b48Spatrick       if (!IVOpnd)
569*09467b48Spatrick         return false;
570*09467b48Spatrick 
571*09467b48Spatrick       // If increment operand is used outside of the loop, this speculation
572*09467b48Spatrick       // could cause extra live range interference.
573*09467b48Spatrick       if (MultiExitLoop) {
574*09467b48Spatrick         for (User *UseI : IVOpnd->users()) {
575*09467b48Spatrick           auto *UserInst = cast<Instruction>(UseI);
576*09467b48Spatrick           if (!L->contains(UserInst))
577*09467b48Spatrick             return false;
578*09467b48Spatrick         }
579*09467b48Spatrick       }
580*09467b48Spatrick 
581*09467b48Spatrick       if (seenIncrement)
582*09467b48Spatrick         return false;
583*09467b48Spatrick       seenIncrement = true;
584*09467b48Spatrick       break;
585*09467b48Spatrick     }
586*09467b48Spatrick     case Instruction::Trunc:
587*09467b48Spatrick     case Instruction::ZExt:
588*09467b48Spatrick     case Instruction::SExt:
589*09467b48Spatrick       // ignore type conversions
590*09467b48Spatrick       break;
591*09467b48Spatrick     }
592*09467b48Spatrick   }
593*09467b48Spatrick   return true;
594*09467b48Spatrick }
595*09467b48Spatrick 
596*09467b48Spatrick /// Fold the loop tail into the loop exit by speculating the loop tail
597*09467b48Spatrick /// instructions. Typically, this is a single post-increment. In the case of a
598*09467b48Spatrick /// simple 2-block loop, hoisting the increment can be much better than
599*09467b48Spatrick /// duplicating the entire loop header. In the case of loops with early exits,
600*09467b48Spatrick /// rotation will not work anyway, but simplifyLoopLatch will put the loop in
601*09467b48Spatrick /// canonical form so downstream passes can handle it.
602*09467b48Spatrick ///
603*09467b48Spatrick /// I don't believe this invalidates SCEV.
604*09467b48Spatrick bool LoopRotate::simplifyLoopLatch(Loop *L) {
605*09467b48Spatrick   BasicBlock *Latch = L->getLoopLatch();
606*09467b48Spatrick   if (!Latch || Latch->hasAddressTaken())
607*09467b48Spatrick     return false;
608*09467b48Spatrick 
609*09467b48Spatrick   BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
610*09467b48Spatrick   if (!Jmp || !Jmp->isUnconditional())
611*09467b48Spatrick     return false;
612*09467b48Spatrick 
613*09467b48Spatrick   BasicBlock *LastExit = Latch->getSinglePredecessor();
614*09467b48Spatrick   if (!LastExit || !L->isLoopExiting(LastExit))
615*09467b48Spatrick     return false;
616*09467b48Spatrick 
617*09467b48Spatrick   BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
618*09467b48Spatrick   if (!BI)
619*09467b48Spatrick     return false;
620*09467b48Spatrick 
621*09467b48Spatrick   if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
622*09467b48Spatrick     return false;
623*09467b48Spatrick 
624*09467b48Spatrick   LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
625*09467b48Spatrick                     << LastExit->getName() << "\n");
626*09467b48Spatrick 
627*09467b48Spatrick   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
628*09467b48Spatrick   MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr,
629*09467b48Spatrick                             /*PredecessorWithTwoSuccessors=*/true);
630*09467b48Spatrick 
631*09467b48Spatrick   if (MSSAU && VerifyMemorySSA)
632*09467b48Spatrick     MSSAU->getMemorySSA()->verifyMemorySSA();
633*09467b48Spatrick 
634*09467b48Spatrick   return true;
635*09467b48Spatrick }
636*09467b48Spatrick 
637*09467b48Spatrick /// Rotate \c L, and return true if any modification was made.
638*09467b48Spatrick bool LoopRotate::processLoop(Loop *L) {
639*09467b48Spatrick   // Save the loop metadata.
640*09467b48Spatrick   MDNode *LoopMD = L->getLoopID();
641*09467b48Spatrick 
642*09467b48Spatrick   bool SimplifiedLatch = false;
643*09467b48Spatrick 
644*09467b48Spatrick   // Simplify the loop latch before attempting to rotate the header
645*09467b48Spatrick   // upward. Rotation may not be needed if the loop tail can be folded into the
646*09467b48Spatrick   // loop exit.
647*09467b48Spatrick   if (!RotationOnly)
648*09467b48Spatrick     SimplifiedLatch = simplifyLoopLatch(L);
649*09467b48Spatrick 
650*09467b48Spatrick   bool MadeChange = rotateLoop(L, SimplifiedLatch);
651*09467b48Spatrick   assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
652*09467b48Spatrick          "Loop latch should be exiting after loop-rotate.");
653*09467b48Spatrick 
654*09467b48Spatrick   // Restore the loop metadata.
655*09467b48Spatrick   // NB! We presume LoopRotation DOESN'T ADD its own metadata.
656*09467b48Spatrick   if ((MadeChange || SimplifiedLatch) && LoopMD)
657*09467b48Spatrick     L->setLoopID(LoopMD);
658*09467b48Spatrick 
659*09467b48Spatrick   return MadeChange || SimplifiedLatch;
660*09467b48Spatrick }
661*09467b48Spatrick 
662*09467b48Spatrick 
663*09467b48Spatrick /// The utility to convert a loop into a loop with bottom test.
664*09467b48Spatrick bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
665*09467b48Spatrick                         AssumptionCache *AC, DominatorTree *DT,
666*09467b48Spatrick                         ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
667*09467b48Spatrick                         const SimplifyQuery &SQ, bool RotationOnly = true,
668*09467b48Spatrick                         unsigned Threshold = unsigned(-1),
669*09467b48Spatrick                         bool IsUtilMode = true) {
670*09467b48Spatrick   if (MSSAU && VerifyMemorySSA)
671*09467b48Spatrick     MSSAU->getMemorySSA()->verifyMemorySSA();
672*09467b48Spatrick   LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
673*09467b48Spatrick                 IsUtilMode);
674*09467b48Spatrick   if (MSSAU && VerifyMemorySSA)
675*09467b48Spatrick     MSSAU->getMemorySSA()->verifyMemorySSA();
676*09467b48Spatrick 
677*09467b48Spatrick   return LR.processLoop(L);
678*09467b48Spatrick }
679