109467b48Spatrick //===----------------- LoopRotationUtils.cpp -----------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file provides utilities to convert a loop into a loop with bottom test.
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick
1309467b48Spatrick #include "llvm/Transforms/Utils/LoopRotationUtils.h"
1409467b48Spatrick #include "llvm/ADT/Statistic.h"
1509467b48Spatrick #include "llvm/Analysis/AssumptionCache.h"
1609467b48Spatrick #include "llvm/Analysis/CodeMetrics.h"
1709467b48Spatrick #include "llvm/Analysis/DomTreeUpdater.h"
1809467b48Spatrick #include "llvm/Analysis/InstructionSimplify.h"
19*d415bd75Srobert #include "llvm/Analysis/LoopInfo.h"
2009467b48Spatrick #include "llvm/Analysis/MemorySSA.h"
2109467b48Spatrick #include "llvm/Analysis/MemorySSAUpdater.h"
2209467b48Spatrick #include "llvm/Analysis/ScalarEvolution.h"
2309467b48Spatrick #include "llvm/Analysis/ValueTracking.h"
2409467b48Spatrick #include "llvm/IR/CFG.h"
2573471bf0Spatrick #include "llvm/IR/DebugInfo.h"
2609467b48Spatrick #include "llvm/IR/Dominators.h"
2709467b48Spatrick #include "llvm/IR/IntrinsicInst.h"
2809467b48Spatrick #include "llvm/Support/CommandLine.h"
2909467b48Spatrick #include "llvm/Support/Debug.h"
3009467b48Spatrick #include "llvm/Support/raw_ostream.h"
3109467b48Spatrick #include "llvm/Transforms/Utils/BasicBlockUtils.h"
3273471bf0Spatrick #include "llvm/Transforms/Utils/Cloning.h"
3309467b48Spatrick #include "llvm/Transforms/Utils/Local.h"
3409467b48Spatrick #include "llvm/Transforms/Utils/SSAUpdater.h"
3509467b48Spatrick #include "llvm/Transforms/Utils/ValueMapper.h"
3609467b48Spatrick using namespace llvm;
3709467b48Spatrick
3809467b48Spatrick #define DEBUG_TYPE "loop-rotate"
3909467b48Spatrick
4073471bf0Spatrick STATISTIC(NumNotRotatedDueToHeaderSize,
4173471bf0Spatrick "Number of loops not rotated due to the header size");
4273471bf0Spatrick STATISTIC(NumInstrsHoisted,
4373471bf0Spatrick "Number of instructions hoisted into loop preheader");
4473471bf0Spatrick STATISTIC(NumInstrsDuplicated,
4573471bf0Spatrick "Number of instructions cloned into loop preheader");
4609467b48Spatrick STATISTIC(NumRotated, "Number of loops rotated");
4709467b48Spatrick
48097a140dSpatrick static cl::opt<bool>
49097a140dSpatrick MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden,
50097a140dSpatrick cl::desc("Allow loop rotation multiple times in order to reach "
51097a140dSpatrick "a better latch exit"));
52097a140dSpatrick
5309467b48Spatrick namespace {
5409467b48Spatrick /// A simple loop rotation transformation.
5509467b48Spatrick class LoopRotate {
5609467b48Spatrick const unsigned MaxHeaderSize;
5709467b48Spatrick LoopInfo *LI;
5809467b48Spatrick const TargetTransformInfo *TTI;
5909467b48Spatrick AssumptionCache *AC;
6009467b48Spatrick DominatorTree *DT;
6109467b48Spatrick ScalarEvolution *SE;
6209467b48Spatrick MemorySSAUpdater *MSSAU;
6309467b48Spatrick const SimplifyQuery &SQ;
6409467b48Spatrick bool RotationOnly;
6509467b48Spatrick bool IsUtilMode;
6673471bf0Spatrick bool PrepareForLTO;
6709467b48Spatrick
6809467b48Spatrick public:
LoopRotate(unsigned MaxHeaderSize,LoopInfo * LI,const TargetTransformInfo * TTI,AssumptionCache * AC,DominatorTree * DT,ScalarEvolution * SE,MemorySSAUpdater * MSSAU,const SimplifyQuery & SQ,bool RotationOnly,bool IsUtilMode,bool PrepareForLTO)6909467b48Spatrick LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
7009467b48Spatrick const TargetTransformInfo *TTI, AssumptionCache *AC,
7109467b48Spatrick DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
7273471bf0Spatrick const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
7373471bf0Spatrick bool PrepareForLTO)
7409467b48Spatrick : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
7509467b48Spatrick MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
7673471bf0Spatrick IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
7709467b48Spatrick bool processLoop(Loop *L);
7809467b48Spatrick
7909467b48Spatrick private:
8009467b48Spatrick bool rotateLoop(Loop *L, bool SimplifiedLatch);
8109467b48Spatrick bool simplifyLoopLatch(Loop *L);
8209467b48Spatrick };
8309467b48Spatrick } // end anonymous namespace
8409467b48Spatrick
8509467b48Spatrick /// Insert (K, V) pair into the ValueToValueMap, and verify the key did not
8609467b48Spatrick /// previously exist in the map, and the value was inserted.
InsertNewValueIntoMap(ValueToValueMapTy & VM,Value * K,Value * V)8709467b48Spatrick static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) {
8809467b48Spatrick bool Inserted = VM.insert({K, V}).second;
8909467b48Spatrick assert(Inserted);
9009467b48Spatrick (void)Inserted;
9109467b48Spatrick }
9209467b48Spatrick /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
9309467b48Spatrick /// old header into the preheader. If there were uses of the values produced by
9409467b48Spatrick /// these instruction that were outside of the loop, we have to insert PHI nodes
9509467b48Spatrick /// to merge the two values. Do this now.
RewriteUsesOfClonedInstructions(BasicBlock * OrigHeader,BasicBlock * OrigPreheader,ValueToValueMapTy & ValueMap,ScalarEvolution * SE,SmallVectorImpl<PHINode * > * InsertedPHIs)9609467b48Spatrick static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
9709467b48Spatrick BasicBlock *OrigPreheader,
9809467b48Spatrick ValueToValueMapTy &ValueMap,
99*d415bd75Srobert ScalarEvolution *SE,
10009467b48Spatrick SmallVectorImpl<PHINode*> *InsertedPHIs) {
10109467b48Spatrick // Remove PHI node entries that are no longer live.
10209467b48Spatrick BasicBlock::iterator I, E = OrigHeader->end();
10309467b48Spatrick for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
10409467b48Spatrick PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
10509467b48Spatrick
10609467b48Spatrick // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
10709467b48Spatrick // as necessary.
10809467b48Spatrick SSAUpdater SSA(InsertedPHIs);
10909467b48Spatrick for (I = OrigHeader->begin(); I != E; ++I) {
11009467b48Spatrick Value *OrigHeaderVal = &*I;
11109467b48Spatrick
11209467b48Spatrick // If there are no uses of the value (e.g. because it returns void), there
11309467b48Spatrick // is nothing to rewrite.
11409467b48Spatrick if (OrigHeaderVal->use_empty())
11509467b48Spatrick continue;
11609467b48Spatrick
11709467b48Spatrick Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
11809467b48Spatrick
11909467b48Spatrick // The value now exits in two versions: the initial value in the preheader
12009467b48Spatrick // and the loop "next" value in the original header.
12109467b48Spatrick SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
122*d415bd75Srobert // Force re-computation of OrigHeaderVal, as some users now need to use the
123*d415bd75Srobert // new PHI node.
124*d415bd75Srobert if (SE)
125*d415bd75Srobert SE->forgetValue(OrigHeaderVal);
12609467b48Spatrick SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
12709467b48Spatrick SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
12809467b48Spatrick
12909467b48Spatrick // Visit each use of the OrigHeader instruction.
130*d415bd75Srobert for (Use &U : llvm::make_early_inc_range(OrigHeaderVal->uses())) {
13109467b48Spatrick // SSAUpdater can't handle a non-PHI use in the same block as an
13209467b48Spatrick // earlier def. We can easily handle those cases manually.
13309467b48Spatrick Instruction *UserInst = cast<Instruction>(U.getUser());
13409467b48Spatrick if (!isa<PHINode>(UserInst)) {
13509467b48Spatrick BasicBlock *UserBB = UserInst->getParent();
13609467b48Spatrick
13709467b48Spatrick // The original users in the OrigHeader are already using the
13809467b48Spatrick // original definitions.
13909467b48Spatrick if (UserBB == OrigHeader)
14009467b48Spatrick continue;
14109467b48Spatrick
14209467b48Spatrick // Users in the OrigPreHeader need to use the value to which the
14309467b48Spatrick // original definitions are mapped.
14409467b48Spatrick if (UserBB == OrigPreheader) {
14509467b48Spatrick U = OrigPreHeaderVal;
14609467b48Spatrick continue;
14709467b48Spatrick }
14809467b48Spatrick }
14909467b48Spatrick
15009467b48Spatrick // Anything else can be handled by SSAUpdater.
15109467b48Spatrick SSA.RewriteUse(U);
15209467b48Spatrick }
15309467b48Spatrick
15409467b48Spatrick // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
15509467b48Spatrick // intrinsics.
15609467b48Spatrick SmallVector<DbgValueInst *, 1> DbgValues;
15709467b48Spatrick llvm::findDbgValues(DbgValues, OrigHeaderVal);
15809467b48Spatrick for (auto &DbgValue : DbgValues) {
15909467b48Spatrick // The original users in the OrigHeader are already using the original
16009467b48Spatrick // definitions.
16109467b48Spatrick BasicBlock *UserBB = DbgValue->getParent();
16209467b48Spatrick if (UserBB == OrigHeader)
16309467b48Spatrick continue;
16409467b48Spatrick
16509467b48Spatrick // Users in the OrigPreHeader need to use the value to which the
16609467b48Spatrick // original definitions are mapped and anything else can be handled by
16709467b48Spatrick // the SSAUpdater. To avoid adding PHINodes, check if the value is
16809467b48Spatrick // available in UserBB, if not substitute undef.
16909467b48Spatrick Value *NewVal;
17009467b48Spatrick if (UserBB == OrigPreheader)
17109467b48Spatrick NewVal = OrigPreHeaderVal;
17209467b48Spatrick else if (SSA.HasValueForBlock(UserBB))
17309467b48Spatrick NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
17409467b48Spatrick else
17509467b48Spatrick NewVal = UndefValue::get(OrigHeaderVal->getType());
17673471bf0Spatrick DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
17709467b48Spatrick }
17809467b48Spatrick }
17909467b48Spatrick }
18009467b48Spatrick
181097a140dSpatrick // Assuming both header and latch are exiting, look for a phi which is only
182097a140dSpatrick // used outside the loop (via a LCSSA phi) in the exit from the header.
183097a140dSpatrick // This means that rotating the loop can remove the phi.
profitableToRotateLoopExitingLatch(Loop * L)184097a140dSpatrick static bool profitableToRotateLoopExitingLatch(Loop *L) {
18509467b48Spatrick BasicBlock *Header = L->getHeader();
186097a140dSpatrick BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator());
187097a140dSpatrick assert(BI && BI->isConditional() && "need header with conditional exit");
188097a140dSpatrick BasicBlock *HeaderExit = BI->getSuccessor(0);
18909467b48Spatrick if (L->contains(HeaderExit))
190097a140dSpatrick HeaderExit = BI->getSuccessor(1);
19109467b48Spatrick
19209467b48Spatrick for (auto &Phi : Header->phis()) {
19309467b48Spatrick // Look for uses of this phi in the loop/via exits other than the header.
19409467b48Spatrick if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) {
19509467b48Spatrick return cast<Instruction>(U)->getParent() != HeaderExit;
19609467b48Spatrick }))
19709467b48Spatrick continue;
19809467b48Spatrick return true;
19909467b48Spatrick }
200097a140dSpatrick return false;
201097a140dSpatrick }
20209467b48Spatrick
203097a140dSpatrick // Check that latch exit is deoptimizing (which means - very unlikely to happen)
204097a140dSpatrick // and there is another exit from the loop which is non-deoptimizing.
205097a140dSpatrick // If we rotate latch to that exit our loop has a better chance of being fully
206097a140dSpatrick // canonical.
207097a140dSpatrick //
208097a140dSpatrick // It can give false positives in some rare cases.
canRotateDeoptimizingLatchExit(Loop * L)209097a140dSpatrick static bool canRotateDeoptimizingLatchExit(Loop *L) {
210097a140dSpatrick BasicBlock *Latch = L->getLoopLatch();
211097a140dSpatrick assert(Latch && "need latch");
212097a140dSpatrick BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
213097a140dSpatrick // Need normal exiting latch.
214097a140dSpatrick if (!BI || !BI->isConditional())
215097a140dSpatrick return false;
216097a140dSpatrick
217097a140dSpatrick BasicBlock *Exit = BI->getSuccessor(1);
218097a140dSpatrick if (L->contains(Exit))
219097a140dSpatrick Exit = BI->getSuccessor(0);
220097a140dSpatrick
221097a140dSpatrick // Latch exit is non-deoptimizing, no need to rotate.
222097a140dSpatrick if (!Exit->getPostdominatingDeoptimizeCall())
223097a140dSpatrick return false;
224097a140dSpatrick
225097a140dSpatrick SmallVector<BasicBlock *, 4> Exits;
226097a140dSpatrick L->getUniqueExitBlocks(Exits);
227097a140dSpatrick if (!Exits.empty()) {
228097a140dSpatrick // There is at least one non-deoptimizing exit.
229097a140dSpatrick //
230097a140dSpatrick // Note, that BasicBlock::getPostdominatingDeoptimizeCall is not exact,
231097a140dSpatrick // as it can conservatively return false for deoptimizing exits with
232097a140dSpatrick // complex enough control flow down to deoptimize call.
233097a140dSpatrick //
234097a140dSpatrick // That means here we can report success for a case where
235097a140dSpatrick // all exits are deoptimizing but one of them has complex enough
236097a140dSpatrick // control flow (e.g. with loops).
237097a140dSpatrick //
238097a140dSpatrick // That should be a very rare case and false positives for this function
239097a140dSpatrick // have compile-time effect only.
240097a140dSpatrick return any_of(Exits, [](const BasicBlock *BB) {
241097a140dSpatrick return !BB->getPostdominatingDeoptimizeCall();
242097a140dSpatrick });
243097a140dSpatrick }
24409467b48Spatrick return false;
24509467b48Spatrick }
24609467b48Spatrick
24709467b48Spatrick /// Rotate loop LP. Return true if the loop is rotated.
24809467b48Spatrick ///
24909467b48Spatrick /// \param SimplifiedLatch is true if the latch was just folded into the final
25009467b48Spatrick /// loop exit. In this case we may want to rotate even though the new latch is
25109467b48Spatrick /// now an exiting branch. This rotation would have happened had the latch not
25209467b48Spatrick /// been simplified. However, if SimplifiedLatch is false, then we avoid
25309467b48Spatrick /// rotating loops in which the latch exits to avoid excessive or endless
25409467b48Spatrick /// rotation. LoopRotate should be repeatable and converge to a canonical
25509467b48Spatrick /// form. This property is satisfied because simplifying the loop latch can only
25609467b48Spatrick /// happen once across multiple invocations of the LoopRotate pass.
257097a140dSpatrick ///
258097a140dSpatrick /// If -loop-rotate-multi is enabled we can do multiple rotations in one go
259097a140dSpatrick /// so to reach a suitable (non-deoptimizing) exit.
rotateLoop(Loop * L,bool SimplifiedLatch)26009467b48Spatrick bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
26109467b48Spatrick // If the loop has only one block then there is not much to rotate.
26209467b48Spatrick if (L->getBlocks().size() == 1)
26309467b48Spatrick return false;
26409467b48Spatrick
265097a140dSpatrick bool Rotated = false;
266097a140dSpatrick do {
26709467b48Spatrick BasicBlock *OrigHeader = L->getHeader();
26809467b48Spatrick BasicBlock *OrigLatch = L->getLoopLatch();
26909467b48Spatrick
27009467b48Spatrick BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
27109467b48Spatrick if (!BI || BI->isUnconditional())
272097a140dSpatrick return Rotated;
27309467b48Spatrick
27409467b48Spatrick // If the loop header is not one of the loop exiting blocks then
27509467b48Spatrick // either this loop is already rotated or it is not
27609467b48Spatrick // suitable for loop rotation transformations.
27709467b48Spatrick if (!L->isLoopExiting(OrigHeader))
278097a140dSpatrick return Rotated;
27909467b48Spatrick
28009467b48Spatrick // If the loop latch already contains a branch that leaves the loop then the
28109467b48Spatrick // loop is already rotated.
28209467b48Spatrick if (!OrigLatch)
283097a140dSpatrick return Rotated;
28409467b48Spatrick
28509467b48Spatrick // Rotate if either the loop latch does *not* exit the loop, or if the loop
28609467b48Spatrick // latch was just simplified. Or if we think it will be profitable.
28709467b48Spatrick if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
288097a140dSpatrick !profitableToRotateLoopExitingLatch(L) &&
289097a140dSpatrick !canRotateDeoptimizingLatchExit(L))
290097a140dSpatrick return Rotated;
29109467b48Spatrick
29209467b48Spatrick // Check size of original header and reject loop if it is very big or we can't
29309467b48Spatrick // duplicate blocks inside it.
29409467b48Spatrick {
29509467b48Spatrick SmallPtrSet<const Value *, 32> EphValues;
29609467b48Spatrick CodeMetrics::collectEphemeralValues(L, AC, EphValues);
29709467b48Spatrick
29809467b48Spatrick CodeMetrics Metrics;
29973471bf0Spatrick Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO);
30009467b48Spatrick if (Metrics.notDuplicatable) {
30109467b48Spatrick LLVM_DEBUG(
30209467b48Spatrick dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
30309467b48Spatrick << " instructions: ";
30409467b48Spatrick L->dump());
305097a140dSpatrick return Rotated;
30609467b48Spatrick }
30709467b48Spatrick if (Metrics.convergent) {
30809467b48Spatrick LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
30909467b48Spatrick "instructions: ";
31009467b48Spatrick L->dump());
311097a140dSpatrick return Rotated;
31209467b48Spatrick }
313*d415bd75Srobert if (!Metrics.NumInsts.isValid()) {
314*d415bd75Srobert LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions"
315*d415bd75Srobert " with invalid cost: ";
316*d415bd75Srobert L->dump());
317*d415bd75Srobert return Rotated;
318*d415bd75Srobert }
319097a140dSpatrick if (Metrics.NumInsts > MaxHeaderSize) {
320097a140dSpatrick LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
321097a140dSpatrick << Metrics.NumInsts
322097a140dSpatrick << " instructions, which is more than the threshold ("
323097a140dSpatrick << MaxHeaderSize << " instructions): ";
324097a140dSpatrick L->dump());
32573471bf0Spatrick ++NumNotRotatedDueToHeaderSize;
326097a140dSpatrick return Rotated;
327097a140dSpatrick }
32873471bf0Spatrick
32973471bf0Spatrick // When preparing for LTO, avoid rotating loops with calls that could be
33073471bf0Spatrick // inlined during the LTO stage.
33173471bf0Spatrick if (PrepareForLTO && Metrics.NumInlineCandidates > 0)
33273471bf0Spatrick return Rotated;
33309467b48Spatrick }
33409467b48Spatrick
33509467b48Spatrick // Now, this loop is suitable for rotation.
33609467b48Spatrick BasicBlock *OrigPreheader = L->getLoopPreheader();
33709467b48Spatrick
33809467b48Spatrick // If the loop could not be converted to canonical form, it must have an
33909467b48Spatrick // indirectbr in it, just give up.
34009467b48Spatrick if (!OrigPreheader || !L->hasDedicatedExits())
341097a140dSpatrick return Rotated;
34209467b48Spatrick
34309467b48Spatrick // Anything ScalarEvolution may know about this loop or the PHI nodes
34409467b48Spatrick // in its header will soon be invalidated. We should also invalidate
34509467b48Spatrick // all outer loops because insertion and deletion of blocks that happens
34609467b48Spatrick // during the rotation may violate invariants related to backedge taken
34709467b48Spatrick // infos in them.
348*d415bd75Srobert if (SE) {
34909467b48Spatrick SE->forgetTopmostLoop(L);
350*d415bd75Srobert // We may hoist some instructions out of loop. In case if they were cached
351*d415bd75Srobert // as "loop variant" or "loop computable", these caches must be dropped.
352*d415bd75Srobert // We also may fold basic blocks, so cached block dispositions also need
353*d415bd75Srobert // to be dropped.
354*d415bd75Srobert SE->forgetBlockAndLoopDispositions();
355*d415bd75Srobert }
35609467b48Spatrick
35709467b48Spatrick LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
35809467b48Spatrick if (MSSAU && VerifyMemorySSA)
35909467b48Spatrick MSSAU->getMemorySSA()->verifyMemorySSA();
36009467b48Spatrick
36109467b48Spatrick // Find new Loop header. NewHeader is a Header's one and only successor
36209467b48Spatrick // that is inside loop. Header's other successor is outside the
36309467b48Spatrick // loop. Otherwise loop is not suitable for rotation.
36409467b48Spatrick BasicBlock *Exit = BI->getSuccessor(0);
36509467b48Spatrick BasicBlock *NewHeader = BI->getSuccessor(1);
36609467b48Spatrick if (L->contains(Exit))
36709467b48Spatrick std::swap(Exit, NewHeader);
36809467b48Spatrick assert(NewHeader && "Unable to determine new loop header");
36909467b48Spatrick assert(L->contains(NewHeader) && !L->contains(Exit) &&
37009467b48Spatrick "Unable to determine loop header and exit blocks");
37109467b48Spatrick
37209467b48Spatrick // This code assumes that the new header has exactly one predecessor.
37309467b48Spatrick // Remove any single-entry PHI nodes in it.
37409467b48Spatrick assert(NewHeader->getSinglePredecessor() &&
37509467b48Spatrick "New header doesn't have one pred!");
37609467b48Spatrick FoldSingleEntryPHINodes(NewHeader);
37709467b48Spatrick
37809467b48Spatrick // Begin by walking OrigHeader and populating ValueMap with an entry for
37909467b48Spatrick // each Instruction.
38009467b48Spatrick BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
38109467b48Spatrick ValueToValueMapTy ValueMap, ValueMapMSSA;
38209467b48Spatrick
38309467b48Spatrick // For PHI nodes, the value available in OldPreHeader is just the
38409467b48Spatrick // incoming value from OldPreHeader.
38509467b48Spatrick for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
38609467b48Spatrick InsertNewValueIntoMap(ValueMap, PN,
38709467b48Spatrick PN->getIncomingValueForBlock(OrigPreheader));
38809467b48Spatrick
38909467b48Spatrick // For the rest of the instructions, either hoist to the OrigPreheader if
39009467b48Spatrick // possible or create a clone in the OldPreHeader if not.
39109467b48Spatrick Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
39209467b48Spatrick
39373471bf0Spatrick // Record all debug intrinsics preceding LoopEntryBranch to avoid
39473471bf0Spatrick // duplication.
39509467b48Spatrick using DbgIntrinsicHash =
39673471bf0Spatrick std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>;
39709467b48Spatrick auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
39873471bf0Spatrick auto VarLocOps = D->location_ops();
39973471bf0Spatrick return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()),
40073471bf0Spatrick D->getVariable()},
40173471bf0Spatrick D->getExpression()};
40209467b48Spatrick };
40309467b48Spatrick SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
404*d415bd75Srobert for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) {
405*d415bd75Srobert if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I))
40609467b48Spatrick DbgIntrinsics.insert(makeHash(DII));
40709467b48Spatrick else
40809467b48Spatrick break;
40909467b48Spatrick }
41009467b48Spatrick
41173471bf0Spatrick // Remember the local noalias scope declarations in the header. After the
41273471bf0Spatrick // rotation, they must be duplicated and the scope must be cloned. This
41373471bf0Spatrick // avoids unwanted interaction across iterations.
41473471bf0Spatrick SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions;
41573471bf0Spatrick for (Instruction &I : *OrigHeader)
41673471bf0Spatrick if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
41773471bf0Spatrick NoAliasDeclInstructions.push_back(Decl);
41873471bf0Spatrick
41909467b48Spatrick while (I != E) {
42009467b48Spatrick Instruction *Inst = &*I++;
42109467b48Spatrick
42209467b48Spatrick // If the instruction's operands are invariant and it doesn't read or write
42309467b48Spatrick // memory, then it is safe to hoist. Doing this doesn't change the order of
42409467b48Spatrick // execution in the preheader, but does prevent the instruction from
42509467b48Spatrick // executing in each iteration of the loop. This means it is safe to hoist
42609467b48Spatrick // something that might trap, but isn't safe to hoist something that reads
42709467b48Spatrick // memory (without proving that the loop doesn't write).
42809467b48Spatrick if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
42909467b48Spatrick !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
43009467b48Spatrick !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
43109467b48Spatrick Inst->moveBefore(LoopEntryBranch);
43273471bf0Spatrick ++NumInstrsHoisted;
43309467b48Spatrick continue;
43409467b48Spatrick }
43509467b48Spatrick
43609467b48Spatrick // Otherwise, create a duplicate of the instruction.
43709467b48Spatrick Instruction *C = Inst->clone();
43873471bf0Spatrick ++NumInstrsDuplicated;
43909467b48Spatrick
44009467b48Spatrick // Eagerly remap the operands of the instruction.
44109467b48Spatrick RemapInstruction(C, ValueMap,
44209467b48Spatrick RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
44309467b48Spatrick
44409467b48Spatrick // Avoid inserting the same intrinsic twice.
44509467b48Spatrick if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
44609467b48Spatrick if (DbgIntrinsics.count(makeHash(DII))) {
44709467b48Spatrick C->deleteValue();
44809467b48Spatrick continue;
44909467b48Spatrick }
45009467b48Spatrick
45109467b48Spatrick // With the operands remapped, see if the instruction constant folds or is
45209467b48Spatrick // otherwise simplifyable. This commonly occurs because the entry from PHI
45309467b48Spatrick // nodes allows icmps and other instructions to fold.
454*d415bd75Srobert Value *V = simplifyInstruction(C, SQ);
45509467b48Spatrick if (V && LI->replacementPreservesLCSSAForm(C, V)) {
45609467b48Spatrick // If so, then delete the temporary instruction and stick the folded value
45709467b48Spatrick // in the map.
45809467b48Spatrick InsertNewValueIntoMap(ValueMap, Inst, V);
45909467b48Spatrick if (!C->mayHaveSideEffects()) {
46009467b48Spatrick C->deleteValue();
46109467b48Spatrick C = nullptr;
46209467b48Spatrick }
46309467b48Spatrick } else {
46409467b48Spatrick InsertNewValueIntoMap(ValueMap, Inst, C);
46509467b48Spatrick }
46609467b48Spatrick if (C) {
46709467b48Spatrick // Otherwise, stick the new instruction into the new block!
46809467b48Spatrick C->setName(Inst->getName());
46909467b48Spatrick C->insertBefore(LoopEntryBranch);
47009467b48Spatrick
47173471bf0Spatrick if (auto *II = dyn_cast<AssumeInst>(C))
47209467b48Spatrick AC->registerAssumption(II);
47309467b48Spatrick // MemorySSA cares whether the cloned instruction was inserted or not, and
47409467b48Spatrick // not whether it can be remapped to a simplified value.
47509467b48Spatrick if (MSSAU)
47609467b48Spatrick InsertNewValueIntoMap(ValueMapMSSA, Inst, C);
47709467b48Spatrick }
47809467b48Spatrick }
47909467b48Spatrick
48073471bf0Spatrick if (!NoAliasDeclInstructions.empty()) {
48173471bf0Spatrick // There are noalias scope declarations:
48273471bf0Spatrick // (general):
48373471bf0Spatrick // Original: OrigPre { OrigHeader NewHeader ... Latch }
48473471bf0Spatrick // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader }
48573471bf0Spatrick //
48673471bf0Spatrick // with D: llvm.experimental.noalias.scope.decl,
48773471bf0Spatrick // U: !noalias or !alias.scope depending on D
48873471bf0Spatrick // ... { D U1 U2 } can transform into:
48973471bf0Spatrick // (0) : ... { D U1 U2 } // no relevant rotation for this part
49073471bf0Spatrick // (1) : ... D' { U1 U2 D } // D is part of OrigHeader
49173471bf0Spatrick // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader
49273471bf0Spatrick //
49373471bf0Spatrick // We now want to transform:
49473471bf0Spatrick // (1) -> : ... D' { D U1 U2 D'' }
49573471bf0Spatrick // (2) -> : ... D' U1' { D U2 D'' U1'' }
49673471bf0Spatrick // D: original llvm.experimental.noalias.scope.decl
49773471bf0Spatrick // D', U1': duplicate with replaced scopes
49873471bf0Spatrick // D'', U1'': different duplicate with replaced scopes
49973471bf0Spatrick // This ensures a safe fallback to 'may_alias' introduced by the rotate,
50073471bf0Spatrick // as U1'' and U1' scopes will not be compatible wrt to the local restrict
50173471bf0Spatrick
50273471bf0Spatrick // Clone the llvm.experimental.noalias.decl again for the NewHeader.
50373471bf0Spatrick Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI());
50473471bf0Spatrick for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) {
50573471bf0Spatrick LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:"
50673471bf0Spatrick << *NAD << "\n");
50773471bf0Spatrick Instruction *NewNAD = NAD->clone();
50873471bf0Spatrick NewNAD->insertBefore(NewHeaderInsertionPoint);
50973471bf0Spatrick }
51073471bf0Spatrick
51173471bf0Spatrick // Scopes must now be duplicated, once for OrigHeader and once for
51273471bf0Spatrick // OrigPreHeader'.
51373471bf0Spatrick {
51473471bf0Spatrick auto &Context = NewHeader->getContext();
51573471bf0Spatrick
51673471bf0Spatrick SmallVector<MDNode *, 8> NoAliasDeclScopes;
51773471bf0Spatrick for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions)
51873471bf0Spatrick NoAliasDeclScopes.push_back(NAD->getScopeList());
51973471bf0Spatrick
52073471bf0Spatrick LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n");
52173471bf0Spatrick cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context,
52273471bf0Spatrick "h.rot");
52373471bf0Spatrick LLVM_DEBUG(OrigHeader->dump());
52473471bf0Spatrick
52573471bf0Spatrick // Keep the compile time impact low by only adapting the inserted block
52673471bf0Spatrick // of instructions in the OrigPreHeader. This might result in slightly
52773471bf0Spatrick // more aliasing between these instructions and those that were already
52873471bf0Spatrick // present, but it will be much faster when the original PreHeader is
52973471bf0Spatrick // large.
53073471bf0Spatrick LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n");
53173471bf0Spatrick auto *FirstDecl =
53273471bf0Spatrick cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]);
53373471bf0Spatrick auto *LastInst = &OrigPreheader->back();
53473471bf0Spatrick cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst,
53573471bf0Spatrick Context, "pre.rot");
53673471bf0Spatrick LLVM_DEBUG(OrigPreheader->dump());
53773471bf0Spatrick
53873471bf0Spatrick LLVM_DEBUG(dbgs() << " Updated NewHeader:\n");
53973471bf0Spatrick LLVM_DEBUG(NewHeader->dump());
54073471bf0Spatrick }
54173471bf0Spatrick }
54273471bf0Spatrick
54309467b48Spatrick // Along with all the other instructions, we just cloned OrigHeader's
54409467b48Spatrick // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
54509467b48Spatrick // successors by duplicating their incoming values for OrigHeader.
54609467b48Spatrick for (BasicBlock *SuccBB : successors(OrigHeader))
54709467b48Spatrick for (BasicBlock::iterator BI = SuccBB->begin();
54809467b48Spatrick PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
54909467b48Spatrick PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
55009467b48Spatrick
55109467b48Spatrick // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
55209467b48Spatrick // OrigPreHeader's old terminator (the original branch into the loop), and
55309467b48Spatrick // remove the corresponding incoming values from the PHI nodes in OrigHeader.
55409467b48Spatrick LoopEntryBranch->eraseFromParent();
55509467b48Spatrick
55609467b48Spatrick // Update MemorySSA before the rewrite call below changes the 1:1
55709467b48Spatrick // instruction:cloned_instruction_or_value mapping.
55809467b48Spatrick if (MSSAU) {
55909467b48Spatrick InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader);
56009467b48Spatrick MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
56109467b48Spatrick ValueMapMSSA);
56209467b48Spatrick }
56309467b48Spatrick
56409467b48Spatrick SmallVector<PHINode*, 2> InsertedPHIs;
56509467b48Spatrick // If there were any uses of instructions in the duplicated block outside the
56609467b48Spatrick // loop, update them, inserting PHI nodes as required
567*d415bd75Srobert RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, SE,
56809467b48Spatrick &InsertedPHIs);
56909467b48Spatrick
57009467b48Spatrick // Attach dbg.value intrinsics to the new phis if that phi uses a value that
57109467b48Spatrick // previously had debug metadata attached. This keeps the debug info
57209467b48Spatrick // up-to-date in the loop body.
57309467b48Spatrick if (!InsertedPHIs.empty())
57409467b48Spatrick insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
57509467b48Spatrick
57609467b48Spatrick // NewHeader is now the header of the loop.
57709467b48Spatrick L->moveToHeader(NewHeader);
57809467b48Spatrick assert(L->getHeader() == NewHeader && "Latch block is our new header");
57909467b48Spatrick
58009467b48Spatrick // Inform DT about changes to the CFG.
58109467b48Spatrick if (DT) {
58209467b48Spatrick // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
58309467b48Spatrick // the DT about the removed edge to the OrigHeader (that got removed).
58409467b48Spatrick SmallVector<DominatorTree::UpdateType, 3> Updates;
58509467b48Spatrick Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
58609467b48Spatrick Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
58709467b48Spatrick Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
58809467b48Spatrick
58909467b48Spatrick if (MSSAU) {
59073471bf0Spatrick MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
59109467b48Spatrick if (VerifyMemorySSA)
59209467b48Spatrick MSSAU->getMemorySSA()->verifyMemorySSA();
59373471bf0Spatrick } else {
59473471bf0Spatrick DT->applyUpdates(Updates);
59509467b48Spatrick }
59609467b48Spatrick }
59709467b48Spatrick
59809467b48Spatrick // At this point, we've finished our major CFG changes. As part of cloning
59909467b48Spatrick // the loop into the preheader we've simplified instructions and the
60009467b48Spatrick // duplicated conditional branch may now be branching on a constant. If it is
60109467b48Spatrick // branching on a constant and if that constant means that we enter the loop,
60209467b48Spatrick // then we fold away the cond branch to an uncond branch. This simplifies the
60309467b48Spatrick // loop in cases important for nested loops, and it also means we don't have
60409467b48Spatrick // to split as many edges.
60509467b48Spatrick BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
60609467b48Spatrick assert(PHBI->isConditional() && "Should be clone of BI condbr!");
60709467b48Spatrick if (!isa<ConstantInt>(PHBI->getCondition()) ||
60809467b48Spatrick PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
60909467b48Spatrick NewHeader) {
61009467b48Spatrick // The conditional branch can't be folded, handle the general case.
61109467b48Spatrick // Split edges as necessary to preserve LoopSimplify form.
61209467b48Spatrick
61309467b48Spatrick // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
61409467b48Spatrick // thus is not a preheader anymore.
61509467b48Spatrick // Split the edge to form a real preheader.
61609467b48Spatrick BasicBlock *NewPH = SplitCriticalEdge(
61709467b48Spatrick OrigPreheader, NewHeader,
61809467b48Spatrick CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
61909467b48Spatrick NewPH->setName(NewHeader->getName() + ".lr.ph");
62009467b48Spatrick
62109467b48Spatrick // Preserve canonical loop form, which means that 'Exit' should have only
62209467b48Spatrick // one predecessor. Note that Exit could be an exit block for multiple
62309467b48Spatrick // nested loops, causing both of the edges to now be critical and need to
62409467b48Spatrick // be split.
625*d415bd75Srobert SmallVector<BasicBlock *, 4> ExitPreds(predecessors(Exit));
62609467b48Spatrick bool SplitLatchEdge = false;
62709467b48Spatrick for (BasicBlock *ExitPred : ExitPreds) {
62809467b48Spatrick // We only need to split loop exit edges.
62909467b48Spatrick Loop *PredLoop = LI->getLoopFor(ExitPred);
63009467b48Spatrick if (!PredLoop || PredLoop->contains(Exit) ||
631*d415bd75Srobert isa<IndirectBrInst>(ExitPred->getTerminator()))
63209467b48Spatrick continue;
63309467b48Spatrick SplitLatchEdge |= L->getLoopLatch() == ExitPred;
63409467b48Spatrick BasicBlock *ExitSplit = SplitCriticalEdge(
63509467b48Spatrick ExitPred, Exit,
63609467b48Spatrick CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
63709467b48Spatrick ExitSplit->moveBefore(Exit);
63809467b48Spatrick }
63909467b48Spatrick assert(SplitLatchEdge &&
64009467b48Spatrick "Despite splitting all preds, failed to split latch exit?");
64173471bf0Spatrick (void)SplitLatchEdge;
64209467b48Spatrick } else {
64309467b48Spatrick // We can fold the conditional branch in the preheader, this makes things
64409467b48Spatrick // simpler. The first step is to remove the extra edge to the Exit block.
64509467b48Spatrick Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
64609467b48Spatrick BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
64709467b48Spatrick NewBI->setDebugLoc(PHBI->getDebugLoc());
64809467b48Spatrick PHBI->eraseFromParent();
64909467b48Spatrick
65009467b48Spatrick // With our CFG finalized, update DomTree if it is available.
65109467b48Spatrick if (DT) DT->deleteEdge(OrigPreheader, Exit);
65209467b48Spatrick
65309467b48Spatrick // Update MSSA too, if available.
65409467b48Spatrick if (MSSAU)
65509467b48Spatrick MSSAU->removeEdge(OrigPreheader, Exit);
65609467b48Spatrick }
65709467b48Spatrick
65809467b48Spatrick assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
65909467b48Spatrick assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
66009467b48Spatrick
66109467b48Spatrick if (MSSAU && VerifyMemorySSA)
66209467b48Spatrick MSSAU->getMemorySSA()->verifyMemorySSA();
66309467b48Spatrick
66409467b48Spatrick // Now that the CFG and DomTree are in a consistent state again, try to merge
66509467b48Spatrick // the OrigHeader block into OrigLatch. This will succeed if they are
66609467b48Spatrick // connected by an unconditional branch. This is just a cleanup so the
66709467b48Spatrick // emitted code isn't too gross in this common case.
66809467b48Spatrick DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
66973471bf0Spatrick BasicBlock *PredBB = OrigHeader->getUniquePredecessor();
67073471bf0Spatrick bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
67173471bf0Spatrick if (DidMerge)
67273471bf0Spatrick RemoveRedundantDbgInstrs(PredBB);
67309467b48Spatrick
67409467b48Spatrick if (MSSAU && VerifyMemorySSA)
67509467b48Spatrick MSSAU->getMemorySSA()->verifyMemorySSA();
67609467b48Spatrick
67709467b48Spatrick LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
67809467b48Spatrick
67909467b48Spatrick ++NumRotated;
680097a140dSpatrick
681097a140dSpatrick Rotated = true;
682097a140dSpatrick SimplifiedLatch = false;
683097a140dSpatrick
684097a140dSpatrick // Check that new latch is a deoptimizing exit and then repeat rotation if possible.
685097a140dSpatrick // Deoptimizing latch exit is not a generally typical case, so we just loop over.
686097a140dSpatrick // TODO: if it becomes a performance bottleneck extend rotation algorithm
687097a140dSpatrick // to handle multiple rotations in one go.
688097a140dSpatrick } while (MultiRotate && canRotateDeoptimizingLatchExit(L));
689097a140dSpatrick
690097a140dSpatrick
69109467b48Spatrick return true;
69209467b48Spatrick }
69309467b48Spatrick
69409467b48Spatrick /// Determine whether the instructions in this range may be safely and cheaply
69509467b48Spatrick /// speculated. This is not an important enough situation to develop complex
69609467b48Spatrick /// heuristics. We handle a single arithmetic instruction along with any type
69709467b48Spatrick /// conversions.
shouldSpeculateInstrs(BasicBlock::iterator Begin,BasicBlock::iterator End,Loop * L)69809467b48Spatrick static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
69909467b48Spatrick BasicBlock::iterator End, Loop *L) {
70009467b48Spatrick bool seenIncrement = false;
70109467b48Spatrick bool MultiExitLoop = false;
70209467b48Spatrick
70309467b48Spatrick if (!L->getExitingBlock())
70409467b48Spatrick MultiExitLoop = true;
70509467b48Spatrick
70609467b48Spatrick for (BasicBlock::iterator I = Begin; I != End; ++I) {
70709467b48Spatrick
70809467b48Spatrick if (!isSafeToSpeculativelyExecute(&*I))
70909467b48Spatrick return false;
71009467b48Spatrick
71109467b48Spatrick if (isa<DbgInfoIntrinsic>(I))
71209467b48Spatrick continue;
71309467b48Spatrick
71409467b48Spatrick switch (I->getOpcode()) {
71509467b48Spatrick default:
71609467b48Spatrick return false;
71709467b48Spatrick case Instruction::GetElementPtr:
71809467b48Spatrick // GEPs are cheap if all indices are constant.
71909467b48Spatrick if (!cast<GEPOperator>(I)->hasAllConstantIndices())
72009467b48Spatrick return false;
72109467b48Spatrick // fall-thru to increment case
722*d415bd75Srobert [[fallthrough]];
72309467b48Spatrick case Instruction::Add:
72409467b48Spatrick case Instruction::Sub:
72509467b48Spatrick case Instruction::And:
72609467b48Spatrick case Instruction::Or:
72709467b48Spatrick case Instruction::Xor:
72809467b48Spatrick case Instruction::Shl:
72909467b48Spatrick case Instruction::LShr:
73009467b48Spatrick case Instruction::AShr: {
73109467b48Spatrick Value *IVOpnd =
73209467b48Spatrick !isa<Constant>(I->getOperand(0))
73309467b48Spatrick ? I->getOperand(0)
73409467b48Spatrick : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr;
73509467b48Spatrick if (!IVOpnd)
73609467b48Spatrick return false;
73709467b48Spatrick
73809467b48Spatrick // If increment operand is used outside of the loop, this speculation
73909467b48Spatrick // could cause extra live range interference.
74009467b48Spatrick if (MultiExitLoop) {
74109467b48Spatrick for (User *UseI : IVOpnd->users()) {
74209467b48Spatrick auto *UserInst = cast<Instruction>(UseI);
74309467b48Spatrick if (!L->contains(UserInst))
74409467b48Spatrick return false;
74509467b48Spatrick }
74609467b48Spatrick }
74709467b48Spatrick
74809467b48Spatrick if (seenIncrement)
74909467b48Spatrick return false;
75009467b48Spatrick seenIncrement = true;
75109467b48Spatrick break;
75209467b48Spatrick }
75309467b48Spatrick case Instruction::Trunc:
75409467b48Spatrick case Instruction::ZExt:
75509467b48Spatrick case Instruction::SExt:
75609467b48Spatrick // ignore type conversions
75709467b48Spatrick break;
75809467b48Spatrick }
75909467b48Spatrick }
76009467b48Spatrick return true;
76109467b48Spatrick }
76209467b48Spatrick
76309467b48Spatrick /// Fold the loop tail into the loop exit by speculating the loop tail
76409467b48Spatrick /// instructions. Typically, this is a single post-increment. In the case of a
76509467b48Spatrick /// simple 2-block loop, hoisting the increment can be much better than
76609467b48Spatrick /// duplicating the entire loop header. In the case of loops with early exits,
76709467b48Spatrick /// rotation will not work anyway, but simplifyLoopLatch will put the loop in
76809467b48Spatrick /// canonical form so downstream passes can handle it.
76909467b48Spatrick ///
77009467b48Spatrick /// I don't believe this invalidates SCEV.
simplifyLoopLatch(Loop * L)77109467b48Spatrick bool LoopRotate::simplifyLoopLatch(Loop *L) {
77209467b48Spatrick BasicBlock *Latch = L->getLoopLatch();
77309467b48Spatrick if (!Latch || Latch->hasAddressTaken())
77409467b48Spatrick return false;
77509467b48Spatrick
77609467b48Spatrick BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
77709467b48Spatrick if (!Jmp || !Jmp->isUnconditional())
77809467b48Spatrick return false;
77909467b48Spatrick
78009467b48Spatrick BasicBlock *LastExit = Latch->getSinglePredecessor();
78109467b48Spatrick if (!LastExit || !L->isLoopExiting(LastExit))
78209467b48Spatrick return false;
78309467b48Spatrick
78409467b48Spatrick BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
78509467b48Spatrick if (!BI)
78609467b48Spatrick return false;
78709467b48Spatrick
78809467b48Spatrick if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
78909467b48Spatrick return false;
79009467b48Spatrick
79109467b48Spatrick LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
79209467b48Spatrick << LastExit->getName() << "\n");
79309467b48Spatrick
79409467b48Spatrick DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
79509467b48Spatrick MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr,
79609467b48Spatrick /*PredecessorWithTwoSuccessors=*/true);
79709467b48Spatrick
798*d415bd75Srobert if (SE) {
799*d415bd75Srobert // Merging blocks may remove blocks reference in the block disposition cache. Clear the cache.
800*d415bd75Srobert SE->forgetBlockAndLoopDispositions();
801*d415bd75Srobert }
802*d415bd75Srobert
80309467b48Spatrick if (MSSAU && VerifyMemorySSA)
80409467b48Spatrick MSSAU->getMemorySSA()->verifyMemorySSA();
80509467b48Spatrick
80609467b48Spatrick return true;
80709467b48Spatrick }
80809467b48Spatrick
80909467b48Spatrick /// Rotate \c L, and return true if any modification was made.
processLoop(Loop * L)81009467b48Spatrick bool LoopRotate::processLoop(Loop *L) {
81109467b48Spatrick // Save the loop metadata.
81209467b48Spatrick MDNode *LoopMD = L->getLoopID();
81309467b48Spatrick
81409467b48Spatrick bool SimplifiedLatch = false;
81509467b48Spatrick
81609467b48Spatrick // Simplify the loop latch before attempting to rotate the header
81709467b48Spatrick // upward. Rotation may not be needed if the loop tail can be folded into the
81809467b48Spatrick // loop exit.
81909467b48Spatrick if (!RotationOnly)
82009467b48Spatrick SimplifiedLatch = simplifyLoopLatch(L);
82109467b48Spatrick
82209467b48Spatrick bool MadeChange = rotateLoop(L, SimplifiedLatch);
82309467b48Spatrick assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
82409467b48Spatrick "Loop latch should be exiting after loop-rotate.");
82509467b48Spatrick
82609467b48Spatrick // Restore the loop metadata.
82709467b48Spatrick // NB! We presume LoopRotation DOESN'T ADD its own metadata.
82809467b48Spatrick if ((MadeChange || SimplifiedLatch) && LoopMD)
82909467b48Spatrick L->setLoopID(LoopMD);
83009467b48Spatrick
83109467b48Spatrick return MadeChange || SimplifiedLatch;
83209467b48Spatrick }
83309467b48Spatrick
83409467b48Spatrick
83509467b48Spatrick /// The utility to convert a loop into a loop with bottom test.
LoopRotation(Loop * L,LoopInfo * LI,const TargetTransformInfo * TTI,AssumptionCache * AC,DominatorTree * DT,ScalarEvolution * SE,MemorySSAUpdater * MSSAU,const SimplifyQuery & SQ,bool RotationOnly=true,unsigned Threshold=unsigned (-1),bool IsUtilMode=true,bool PrepareForLTO)83609467b48Spatrick bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
83709467b48Spatrick AssumptionCache *AC, DominatorTree *DT,
83809467b48Spatrick ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
83909467b48Spatrick const SimplifyQuery &SQ, bool RotationOnly = true,
84009467b48Spatrick unsigned Threshold = unsigned(-1),
84173471bf0Spatrick bool IsUtilMode = true, bool PrepareForLTO) {
84209467b48Spatrick LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
84373471bf0Spatrick IsUtilMode, PrepareForLTO);
84409467b48Spatrick return LR.processLoop(L);
84509467b48Spatrick }
846