17330f729Sjoerg //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // This file implements some loop unrolling utilities. It does not define any
107330f729Sjoerg // actual pass or policy, but provides a single function to perform loop
117330f729Sjoerg // unrolling.
127330f729Sjoerg //
137330f729Sjoerg // The process of unrolling can produce extraneous basic blocks linked with
147330f729Sjoerg // unconditional branches. This will be corrected in the future.
157330f729Sjoerg //
167330f729Sjoerg //===----------------------------------------------------------------------===//
177330f729Sjoerg
18*82d56013Sjoerg #include "llvm/ADT/ArrayRef.h"
19*82d56013Sjoerg #include "llvm/ADT/DenseMap.h"
20*82d56013Sjoerg #include "llvm/ADT/Optional.h"
21*82d56013Sjoerg #include "llvm/ADT/STLExtras.h"
22*82d56013Sjoerg #include "llvm/ADT/SetVector.h"
23*82d56013Sjoerg #include "llvm/ADT/SmallVector.h"
247330f729Sjoerg #include "llvm/ADT/Statistic.h"
25*82d56013Sjoerg #include "llvm/ADT/StringRef.h"
26*82d56013Sjoerg #include "llvm/ADT/Twine.h"
27*82d56013Sjoerg #include "llvm/ADT/ilist_iterator.h"
28*82d56013Sjoerg #include "llvm/ADT/iterator_range.h"
297330f729Sjoerg #include "llvm/Analysis/AssumptionCache.h"
30*82d56013Sjoerg #include "llvm/Analysis/DomTreeUpdater.h"
317330f729Sjoerg #include "llvm/Analysis/InstructionSimplify.h"
32*82d56013Sjoerg #include "llvm/Analysis/LoopInfo.h"
337330f729Sjoerg #include "llvm/Analysis/LoopIterator.h"
347330f729Sjoerg #include "llvm/Analysis/OptimizationRemarkEmitter.h"
357330f729Sjoerg #include "llvm/Analysis/ScalarEvolution.h"
367330f729Sjoerg #include "llvm/IR/BasicBlock.h"
37*82d56013Sjoerg #include "llvm/IR/CFG.h"
38*82d56013Sjoerg #include "llvm/IR/Constants.h"
397330f729Sjoerg #include "llvm/IR/DebugInfoMetadata.h"
40*82d56013Sjoerg #include "llvm/IR/DebugLoc.h"
41*82d56013Sjoerg #include "llvm/IR/DiagnosticInfo.h"
427330f729Sjoerg #include "llvm/IR/Dominators.h"
43*82d56013Sjoerg #include "llvm/IR/Function.h"
44*82d56013Sjoerg #include "llvm/IR/Instruction.h"
45*82d56013Sjoerg #include "llvm/IR/Instructions.h"
467330f729Sjoerg #include "llvm/IR/IntrinsicInst.h"
47*82d56013Sjoerg #include "llvm/IR/Metadata.h"
48*82d56013Sjoerg #include "llvm/IR/Module.h"
49*82d56013Sjoerg #include "llvm/IR/Use.h"
50*82d56013Sjoerg #include "llvm/IR/User.h"
51*82d56013Sjoerg #include "llvm/IR/ValueHandle.h"
52*82d56013Sjoerg #include "llvm/IR/ValueMap.h"
53*82d56013Sjoerg #include "llvm/Support/Casting.h"
54*82d56013Sjoerg #include "llvm/Support/CommandLine.h"
557330f729Sjoerg #include "llvm/Support/Debug.h"
56*82d56013Sjoerg #include "llvm/Support/GenericDomTree.h"
57*82d56013Sjoerg #include "llvm/Support/MathExtras.h"
587330f729Sjoerg #include "llvm/Support/raw_ostream.h"
597330f729Sjoerg #include "llvm/Transforms/Utils/BasicBlockUtils.h"
607330f729Sjoerg #include "llvm/Transforms/Utils/Cloning.h"
61*82d56013Sjoerg #include "llvm/Transforms/Utils/Local.h"
62*82d56013Sjoerg #include "llvm/Transforms/Utils/LoopPeel.h"
637330f729Sjoerg #include "llvm/Transforms/Utils/LoopSimplify.h"
647330f729Sjoerg #include "llvm/Transforms/Utils/LoopUtils.h"
657330f729Sjoerg #include "llvm/Transforms/Utils/SimplifyIndVar.h"
667330f729Sjoerg #include "llvm/Transforms/Utils/UnrollLoop.h"
67*82d56013Sjoerg #include "llvm/Transforms/Utils/ValueMapper.h"
68*82d56013Sjoerg #include <algorithm>
69*82d56013Sjoerg #include <assert.h>
70*82d56013Sjoerg #include <type_traits>
71*82d56013Sjoerg #include <vector>
72*82d56013Sjoerg
73*82d56013Sjoerg namespace llvm {
74*82d56013Sjoerg class DataLayout;
75*82d56013Sjoerg class Value;
76*82d56013Sjoerg } // namespace llvm
77*82d56013Sjoerg
787330f729Sjoerg using namespace llvm;
797330f729Sjoerg
807330f729Sjoerg #define DEBUG_TYPE "loop-unroll"
817330f729Sjoerg
827330f729Sjoerg // TODO: Should these be here or in LoopUnroll?
837330f729Sjoerg STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
847330f729Sjoerg STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
85*82d56013Sjoerg STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
86*82d56013Sjoerg "latch (completely or otherwise)");
877330f729Sjoerg
887330f729Sjoerg static cl::opt<bool>
897330f729Sjoerg UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
907330f729Sjoerg cl::desc("Allow runtime unrolled loops to be unrolled "
917330f729Sjoerg "with epilog instead of prolog."));
927330f729Sjoerg
937330f729Sjoerg static cl::opt<bool>
947330f729Sjoerg UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
957330f729Sjoerg cl::desc("Verify domtree after unrolling"),
967330f729Sjoerg #ifdef EXPENSIVE_CHECKS
977330f729Sjoerg cl::init(true)
987330f729Sjoerg #else
997330f729Sjoerg cl::init(false)
1007330f729Sjoerg #endif
1017330f729Sjoerg );
1027330f729Sjoerg
1037330f729Sjoerg /// Check if unrolling created a situation where we need to insert phi nodes to
1047330f729Sjoerg /// preserve LCSSA form.
1057330f729Sjoerg /// \param Blocks is a vector of basic blocks representing unrolled loop.
1067330f729Sjoerg /// \param L is the outer loop.
1077330f729Sjoerg /// It's possible that some of the blocks are in L, and some are not. In this
1087330f729Sjoerg /// case, if there is a use is outside L, and definition is inside L, we need to
1097330f729Sjoerg /// insert a phi-node, otherwise LCSSA will be broken.
1107330f729Sjoerg /// The function is just a helper function for llvm::UnrollLoop that returns
1117330f729Sjoerg /// true if this situation occurs, indicating that LCSSA needs to be fixed.
needToInsertPhisForLCSSA(Loop * L,const std::vector<BasicBlock * > & Blocks,LoopInfo * LI)112*82d56013Sjoerg static bool needToInsertPhisForLCSSA(Loop *L,
113*82d56013Sjoerg const std::vector<BasicBlock *> &Blocks,
1147330f729Sjoerg LoopInfo *LI) {
1157330f729Sjoerg for (BasicBlock *BB : Blocks) {
1167330f729Sjoerg if (LI->getLoopFor(BB) == L)
1177330f729Sjoerg continue;
1187330f729Sjoerg for (Instruction &I : *BB) {
1197330f729Sjoerg for (Use &U : I.operands()) {
120*82d56013Sjoerg if (const auto *Def = dyn_cast<Instruction>(U)) {
1217330f729Sjoerg Loop *DefLoop = LI->getLoopFor(Def->getParent());
1227330f729Sjoerg if (!DefLoop)
1237330f729Sjoerg continue;
1247330f729Sjoerg if (DefLoop->contains(L))
1257330f729Sjoerg return true;
1267330f729Sjoerg }
1277330f729Sjoerg }
1287330f729Sjoerg }
1297330f729Sjoerg }
1307330f729Sjoerg return false;
1317330f729Sjoerg }
1327330f729Sjoerg
1337330f729Sjoerg /// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
1347330f729Sjoerg /// and adds a mapping from the original loop to the new loop to NewLoops.
1357330f729Sjoerg /// Returns nullptr if no new loop was created and a pointer to the
1367330f729Sjoerg /// original loop OriginalBB was part of otherwise.
addClonedBlockToLoopInfo(BasicBlock * OriginalBB,BasicBlock * ClonedBB,LoopInfo * LI,NewLoopsMap & NewLoops)1377330f729Sjoerg const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
1387330f729Sjoerg BasicBlock *ClonedBB, LoopInfo *LI,
1397330f729Sjoerg NewLoopsMap &NewLoops) {
1407330f729Sjoerg // Figure out which loop New is in.
1417330f729Sjoerg const Loop *OldLoop = LI->getLoopFor(OriginalBB);
1427330f729Sjoerg assert(OldLoop && "Should (at least) be in the loop being unrolled!");
1437330f729Sjoerg
1447330f729Sjoerg Loop *&NewLoop = NewLoops[OldLoop];
1457330f729Sjoerg if (!NewLoop) {
1467330f729Sjoerg // Found a new sub-loop.
1477330f729Sjoerg assert(OriginalBB == OldLoop->getHeader() &&
1487330f729Sjoerg "Header should be first in RPO");
1497330f729Sjoerg
1507330f729Sjoerg NewLoop = LI->AllocateLoop();
1517330f729Sjoerg Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
1527330f729Sjoerg
1537330f729Sjoerg if (NewLoopParent)
1547330f729Sjoerg NewLoopParent->addChildLoop(NewLoop);
1557330f729Sjoerg else
1567330f729Sjoerg LI->addTopLevelLoop(NewLoop);
1577330f729Sjoerg
1587330f729Sjoerg NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
1597330f729Sjoerg return OldLoop;
1607330f729Sjoerg } else {
1617330f729Sjoerg NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
1627330f729Sjoerg return nullptr;
1637330f729Sjoerg }
1647330f729Sjoerg }
1657330f729Sjoerg
1667330f729Sjoerg /// The function chooses which type of unroll (epilog or prolog) is more
1677330f729Sjoerg /// profitabale.
1687330f729Sjoerg /// Epilog unroll is more profitable when there is PHI that starts from
1697330f729Sjoerg /// constant. In this case epilog will leave PHI start from constant,
1707330f729Sjoerg /// but prolog will convert it to non-constant.
1717330f729Sjoerg ///
1727330f729Sjoerg /// loop:
1737330f729Sjoerg /// PN = PHI [I, Latch], [CI, PreHeader]
1747330f729Sjoerg /// I = foo(PN)
1757330f729Sjoerg /// ...
1767330f729Sjoerg ///
1777330f729Sjoerg /// Epilog unroll case.
1787330f729Sjoerg /// loop:
1797330f729Sjoerg /// PN = PHI [I2, Latch], [CI, PreHeader]
1807330f729Sjoerg /// I1 = foo(PN)
1817330f729Sjoerg /// I2 = foo(I1)
1827330f729Sjoerg /// ...
1837330f729Sjoerg /// Prolog unroll case.
1847330f729Sjoerg /// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
1857330f729Sjoerg /// loop:
1867330f729Sjoerg /// PN = PHI [I2, Latch], [NewPN, PreHeader]
1877330f729Sjoerg /// I1 = foo(PN)
1887330f729Sjoerg /// I2 = foo(I1)
1897330f729Sjoerg /// ...
1907330f729Sjoerg ///
isEpilogProfitable(Loop * L)1917330f729Sjoerg static bool isEpilogProfitable(Loop *L) {
1927330f729Sjoerg BasicBlock *PreHeader = L->getLoopPreheader();
1937330f729Sjoerg BasicBlock *Header = L->getHeader();
1947330f729Sjoerg assert(PreHeader && Header);
1957330f729Sjoerg for (const PHINode &PN : Header->phis()) {
1967330f729Sjoerg if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
1977330f729Sjoerg return true;
1987330f729Sjoerg }
1997330f729Sjoerg return false;
2007330f729Sjoerg }
2017330f729Sjoerg
2027330f729Sjoerg /// Perform some cleanup and simplifications on loops after unrolling. It is
2037330f729Sjoerg /// useful to simplify the IV's in the new loop, as well as do a quick
2047330f729Sjoerg /// simplify/dce pass of the instructions.
simplifyLoopAfterUnroll(Loop * L,bool SimplifyIVs,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,const TargetTransformInfo * TTI)2057330f729Sjoerg void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
2067330f729Sjoerg ScalarEvolution *SE, DominatorTree *DT,
207*82d56013Sjoerg AssumptionCache *AC,
208*82d56013Sjoerg const TargetTransformInfo *TTI) {
2097330f729Sjoerg // Simplify any new induction variables in the partially unrolled loop.
2107330f729Sjoerg if (SE && SimplifyIVs) {
2117330f729Sjoerg SmallVector<WeakTrackingVH, 16> DeadInsts;
212*82d56013Sjoerg simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts);
2137330f729Sjoerg
2147330f729Sjoerg // Aggressively clean up dead instructions that simplifyLoopIVs already
2157330f729Sjoerg // identified. Any remaining should be cleaned up below.
216*82d56013Sjoerg while (!DeadInsts.empty()) {
217*82d56013Sjoerg Value *V = DeadInsts.pop_back_val();
218*82d56013Sjoerg if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2197330f729Sjoerg RecursivelyDeleteTriviallyDeadInstructions(Inst);
2207330f729Sjoerg }
221*82d56013Sjoerg }
2227330f729Sjoerg
223*82d56013Sjoerg // At this point, the code is well formed. Perform constprop, instsimplify,
224*82d56013Sjoerg // and dce.
2257330f729Sjoerg const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
226*82d56013Sjoerg SmallVector<WeakTrackingVH, 16> DeadInsts;
2277330f729Sjoerg for (BasicBlock *BB : L->getBlocks()) {
2287330f729Sjoerg for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
2297330f729Sjoerg Instruction *Inst = &*I++;
2307330f729Sjoerg if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
2317330f729Sjoerg if (LI->replacementPreservesLCSSAForm(Inst, V))
2327330f729Sjoerg Inst->replaceAllUsesWith(V);
2337330f729Sjoerg if (isInstructionTriviallyDead(Inst))
234*82d56013Sjoerg DeadInsts.emplace_back(Inst);
2357330f729Sjoerg }
236*82d56013Sjoerg // We can't do recursive deletion until we're done iterating, as we might
237*82d56013Sjoerg // have a phi which (potentially indirectly) uses instructions later in
238*82d56013Sjoerg // the block we're iterating through.
239*82d56013Sjoerg RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
2407330f729Sjoerg }
2417330f729Sjoerg }
2427330f729Sjoerg
2437330f729Sjoerg /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
2447330f729Sjoerg /// can only fail when the loop's latch block is not terminated by a conditional
2457330f729Sjoerg /// branch instruction. However, if the trip count (and multiple) are not known,
2467330f729Sjoerg /// loop unrolling will mostly produce more code that is no faster.
2477330f729Sjoerg ///
2487330f729Sjoerg /// TripCount is the upper bound of the iteration on which control exits
2497330f729Sjoerg /// LatchBlock. Control may exit the loop prior to TripCount iterations either
2507330f729Sjoerg /// via an early branch in other loop block or via LatchBlock terminator. This
2517330f729Sjoerg /// is relaxed from the general definition of trip count which is the number of
2527330f729Sjoerg /// times the loop header executes. Note that UnrollLoop assumes that the loop
2537330f729Sjoerg /// counter test is in LatchBlock in order to remove unnecesssary instances of
2547330f729Sjoerg /// the test. If control can exit the loop from the LatchBlock's terminator
2557330f729Sjoerg /// prior to TripCount iterations, flag PreserveCondBr needs to be set.
2567330f729Sjoerg ///
2577330f729Sjoerg /// PreserveCondBr indicates whether the conditional branch of the LatchBlock
2587330f729Sjoerg /// needs to be preserved. It is needed when we use trip count upper bound to
2597330f729Sjoerg /// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
2607330f729Sjoerg /// conditional branch needs to be preserved.
2617330f729Sjoerg ///
2627330f729Sjoerg /// Similarly, TripMultiple divides the number of times that the LatchBlock may
2637330f729Sjoerg /// execute without exiting the loop.
2647330f729Sjoerg ///
2657330f729Sjoerg /// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
2667330f729Sjoerg /// have a runtime (i.e. not compile time constant) trip count. Unrolling these
2677330f729Sjoerg /// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
2687330f729Sjoerg /// iterations before branching into the unrolled loop. UnrollLoop will not
2697330f729Sjoerg /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
2707330f729Sjoerg /// AllowExpensiveTripCount is false.
2717330f729Sjoerg ///
2727330f729Sjoerg /// If we want to perform PGO-based loop peeling, PeelCount is set to the
2737330f729Sjoerg /// number of iterations we want to peel off.
2747330f729Sjoerg ///
2757330f729Sjoerg /// The LoopInfo Analysis that is passed will be kept consistent.
2767330f729Sjoerg ///
2777330f729Sjoerg /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
2787330f729Sjoerg /// DominatorTree if they are non-null.
2797330f729Sjoerg ///
2807330f729Sjoerg /// If RemainderLoop is non-null, it will receive the remainder loop (if
2817330f729Sjoerg /// required and not fully unrolled).
UnrollLoop(Loop * L,UnrollLoopOptions ULO,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,const TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE,bool PreserveLCSSA,Loop ** RemainderLoop)2827330f729Sjoerg LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
2837330f729Sjoerg ScalarEvolution *SE, DominatorTree *DT,
2847330f729Sjoerg AssumptionCache *AC,
285*82d56013Sjoerg const TargetTransformInfo *TTI,
2867330f729Sjoerg OptimizationRemarkEmitter *ORE,
2877330f729Sjoerg bool PreserveLCSSA, Loop **RemainderLoop) {
2887330f729Sjoerg
289*82d56013Sjoerg if (!L->getLoopPreheader()) {
2907330f729Sjoerg LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
2917330f729Sjoerg return LoopUnrollResult::Unmodified;
2927330f729Sjoerg }
2937330f729Sjoerg
294*82d56013Sjoerg if (!L->getLoopLatch()) {
2957330f729Sjoerg LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
2967330f729Sjoerg return LoopUnrollResult::Unmodified;
2977330f729Sjoerg }
2987330f729Sjoerg
2997330f729Sjoerg // Loops with indirectbr cannot be cloned.
3007330f729Sjoerg if (!L->isSafeToClone()) {
3017330f729Sjoerg LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
3027330f729Sjoerg return LoopUnrollResult::Unmodified;
3037330f729Sjoerg }
3047330f729Sjoerg
305*82d56013Sjoerg if (L->getHeader()->hasAddressTaken()) {
3067330f729Sjoerg // The loop-rotate pass can be helpful to avoid this in many cases.
3077330f729Sjoerg LLVM_DEBUG(
3087330f729Sjoerg dbgs() << " Won't unroll loop: address of header block is taken.\n");
3097330f729Sjoerg return LoopUnrollResult::Unmodified;
3107330f729Sjoerg }
3117330f729Sjoerg
3127330f729Sjoerg if (ULO.TripCount != 0)
3137330f729Sjoerg LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n");
3147330f729Sjoerg if (ULO.TripMultiple != 1)
3157330f729Sjoerg LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n");
3167330f729Sjoerg
3177330f729Sjoerg // Effectively "DCE" unrolled iterations that are beyond the tripcount
3187330f729Sjoerg // and will never be executed.
3197330f729Sjoerg if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
3207330f729Sjoerg ULO.Count = ULO.TripCount;
3217330f729Sjoerg
3227330f729Sjoerg // Don't enter the unroll code if there is nothing to do.
3237330f729Sjoerg if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
3247330f729Sjoerg LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
3257330f729Sjoerg return LoopUnrollResult::Unmodified;
3267330f729Sjoerg }
3277330f729Sjoerg
3287330f729Sjoerg assert(ULO.Count > 0);
3297330f729Sjoerg assert(ULO.TripMultiple > 0);
3307330f729Sjoerg assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
3317330f729Sjoerg
3327330f729Sjoerg // Are we eliminating the loop control altogether?
3337330f729Sjoerg bool CompletelyUnroll = ULO.Count == ULO.TripCount;
3347330f729Sjoerg
3357330f729Sjoerg // We assume a run-time trip count if the compiler cannot
3367330f729Sjoerg // figure out the loop trip count and the unroll-runtime
3377330f729Sjoerg // flag is specified.
3387330f729Sjoerg bool RuntimeTripCount =
3397330f729Sjoerg (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
3407330f729Sjoerg
3417330f729Sjoerg assert((!RuntimeTripCount || !ULO.PeelCount) &&
3427330f729Sjoerg "Did not expect runtime trip-count unrolling "
3437330f729Sjoerg "and peeling for the same loop");
3447330f729Sjoerg
3457330f729Sjoerg bool Peeled = false;
3467330f729Sjoerg if (ULO.PeelCount) {
3477330f729Sjoerg Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
3487330f729Sjoerg
3497330f729Sjoerg // Successful peeling may result in a change in the loop preheader/trip
3507330f729Sjoerg // counts. If we later unroll the loop, we want these to be updated.
3517330f729Sjoerg if (Peeled) {
3527330f729Sjoerg // According to our guards and profitability checks the only
3537330f729Sjoerg // meaningful exit should be latch block. Other exits go to deopt,
3547330f729Sjoerg // so we do not worry about them.
3557330f729Sjoerg BasicBlock *ExitingBlock = L->getLoopLatch();
3567330f729Sjoerg assert(ExitingBlock && "Loop without exiting block?");
3577330f729Sjoerg assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
3587330f729Sjoerg ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
3597330f729Sjoerg ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
3607330f729Sjoerg }
3617330f729Sjoerg }
3627330f729Sjoerg
363*82d56013Sjoerg // All these values should be taken only after peeling because they might have
364*82d56013Sjoerg // changed.
365*82d56013Sjoerg BasicBlock *Preheader = L->getLoopPreheader();
366*82d56013Sjoerg BasicBlock *Header = L->getHeader();
367*82d56013Sjoerg BasicBlock *LatchBlock = L->getLoopLatch();
368*82d56013Sjoerg SmallVector<BasicBlock *, 4> ExitBlocks;
369*82d56013Sjoerg L->getExitBlocks(ExitBlocks);
370*82d56013Sjoerg std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
371*82d56013Sjoerg
372*82d56013Sjoerg // Go through all exits of L and see if there are any phi-nodes there. We just
373*82d56013Sjoerg // conservatively assume that they're inserted to preserve LCSSA form, which
374*82d56013Sjoerg // means that complete unrolling might break this form. We need to either fix
375*82d56013Sjoerg // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
376*82d56013Sjoerg // now we just recompute LCSSA for the outer loop, but it should be possible
377*82d56013Sjoerg // to fix it in-place.
378*82d56013Sjoerg bool NeedToFixLCSSA =
379*82d56013Sjoerg PreserveLCSSA && CompletelyUnroll &&
380*82d56013Sjoerg any_of(ExitBlocks,
381*82d56013Sjoerg [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
382*82d56013Sjoerg
383*82d56013Sjoerg // The current loop unroll pass can unroll loops that have
384*82d56013Sjoerg // (1) single latch; and
385*82d56013Sjoerg // (2a) latch is unconditional; or
386*82d56013Sjoerg // (2b) latch is conditional and is an exiting block
387*82d56013Sjoerg // FIXME: The implementation can be extended to work with more complicated
388*82d56013Sjoerg // cases, e.g. loops with multiple latches.
389*82d56013Sjoerg BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
390*82d56013Sjoerg
391*82d56013Sjoerg // A conditional branch which exits the loop, which can be optimized to an
392*82d56013Sjoerg // unconditional branch in the unrolled loop in some cases.
393*82d56013Sjoerg BranchInst *ExitingBI = nullptr;
394*82d56013Sjoerg bool LatchIsExiting = L->isLoopExiting(LatchBlock);
395*82d56013Sjoerg if (LatchIsExiting)
396*82d56013Sjoerg ExitingBI = LatchBI;
397*82d56013Sjoerg else if (BasicBlock *ExitingBlock = L->getExitingBlock())
398*82d56013Sjoerg ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
399*82d56013Sjoerg if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
400*82d56013Sjoerg // If the peeling guard is changed this assert may be relaxed or even
401*82d56013Sjoerg // deleted.
402*82d56013Sjoerg assert(!Peeled && "Peeling guard changed!");
403*82d56013Sjoerg LLVM_DEBUG(
404*82d56013Sjoerg dbgs() << "Can't unroll; a conditional latch must exit the loop");
405*82d56013Sjoerg return LoopUnrollResult::Unmodified;
406*82d56013Sjoerg }
407*82d56013Sjoerg LLVM_DEBUG({
408*82d56013Sjoerg if (ExitingBI)
409*82d56013Sjoerg dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
410*82d56013Sjoerg << "\n";
411*82d56013Sjoerg else
412*82d56013Sjoerg dbgs() << " No single exiting block\n";
413*82d56013Sjoerg });
414*82d56013Sjoerg
4157330f729Sjoerg // Loops containing convergent instructions must have a count that divides
4167330f729Sjoerg // their TripMultiple.
4177330f729Sjoerg LLVM_DEBUG(
4187330f729Sjoerg {
4197330f729Sjoerg bool HasConvergent = false;
4207330f729Sjoerg for (auto &BB : L->blocks())
4217330f729Sjoerg for (auto &I : *BB)
422*82d56013Sjoerg if (auto *CB = dyn_cast<CallBase>(&I))
423*82d56013Sjoerg HasConvergent |= CB->isConvergent();
4247330f729Sjoerg assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) &&
4257330f729Sjoerg "Unroll count must divide trip multiple if loop contains a "
4267330f729Sjoerg "convergent operation.");
4277330f729Sjoerg });
4287330f729Sjoerg
4297330f729Sjoerg bool EpilogProfitability =
4307330f729Sjoerg UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
4317330f729Sjoerg : isEpilogProfitable(L);
4327330f729Sjoerg
4337330f729Sjoerg if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 &&
4347330f729Sjoerg !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
4357330f729Sjoerg EpilogProfitability, ULO.UnrollRemainder,
436*82d56013Sjoerg ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
4377330f729Sjoerg PreserveLCSSA, RemainderLoop)) {
4387330f729Sjoerg if (ULO.Force)
4397330f729Sjoerg RuntimeTripCount = false;
4407330f729Sjoerg else {
4417330f729Sjoerg LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
4427330f729Sjoerg "generated when assuming runtime trip count\n");
4437330f729Sjoerg return LoopUnrollResult::Unmodified;
4447330f729Sjoerg }
4457330f729Sjoerg }
4467330f729Sjoerg
4477330f729Sjoerg // If we know the trip count, we know the multiple...
4487330f729Sjoerg unsigned BreakoutTrip = 0;
4497330f729Sjoerg if (ULO.TripCount != 0) {
4507330f729Sjoerg BreakoutTrip = ULO.TripCount % ULO.Count;
4517330f729Sjoerg ULO.TripMultiple = 0;
4527330f729Sjoerg } else {
4537330f729Sjoerg // Figure out what multiple to use.
4547330f729Sjoerg BreakoutTrip = ULO.TripMultiple =
4557330f729Sjoerg (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple);
4567330f729Sjoerg }
4577330f729Sjoerg
4587330f729Sjoerg using namespace ore;
4597330f729Sjoerg // Report the unrolling decision.
4607330f729Sjoerg if (CompletelyUnroll) {
4617330f729Sjoerg LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
4627330f729Sjoerg << " with trip count " << ULO.TripCount << "!\n");
4637330f729Sjoerg if (ORE)
4647330f729Sjoerg ORE->emit([&]() {
4657330f729Sjoerg return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
4667330f729Sjoerg L->getHeader())
4677330f729Sjoerg << "completely unrolled loop with "
4687330f729Sjoerg << NV("UnrollCount", ULO.TripCount) << " iterations";
4697330f729Sjoerg });
4707330f729Sjoerg } else if (ULO.PeelCount) {
4717330f729Sjoerg LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
4727330f729Sjoerg << " with iteration count " << ULO.PeelCount << "!\n");
4737330f729Sjoerg if (ORE)
4747330f729Sjoerg ORE->emit([&]() {
4757330f729Sjoerg return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
4767330f729Sjoerg L->getHeader())
4777330f729Sjoerg << " peeled loop by " << NV("PeelCount", ULO.PeelCount)
4787330f729Sjoerg << " iterations";
4797330f729Sjoerg });
4807330f729Sjoerg } else {
4817330f729Sjoerg auto DiagBuilder = [&]() {
4827330f729Sjoerg OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
4837330f729Sjoerg L->getHeader());
4847330f729Sjoerg return Diag << "unrolled loop by a factor of "
4857330f729Sjoerg << NV("UnrollCount", ULO.Count);
4867330f729Sjoerg };
4877330f729Sjoerg
4887330f729Sjoerg LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
4897330f729Sjoerg << ULO.Count);
4907330f729Sjoerg if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) {
4917330f729Sjoerg LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
4927330f729Sjoerg if (ORE)
4937330f729Sjoerg ORE->emit([&]() {
4947330f729Sjoerg return DiagBuilder() << " with a breakout at trip "
4957330f729Sjoerg << NV("BreakoutTrip", BreakoutTrip);
4967330f729Sjoerg });
4977330f729Sjoerg } else if (ULO.TripMultiple != 1) {
4987330f729Sjoerg LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch");
4997330f729Sjoerg if (ORE)
5007330f729Sjoerg ORE->emit([&]() {
5017330f729Sjoerg return DiagBuilder()
5027330f729Sjoerg << " with " << NV("TripMultiple", ULO.TripMultiple)
5037330f729Sjoerg << " trips per branch";
5047330f729Sjoerg });
5057330f729Sjoerg } else if (RuntimeTripCount) {
5067330f729Sjoerg LLVM_DEBUG(dbgs() << " with run-time trip count");
5077330f729Sjoerg if (ORE)
5087330f729Sjoerg ORE->emit(
5097330f729Sjoerg [&]() { return DiagBuilder() << " with run-time trip count"; });
5107330f729Sjoerg }
5117330f729Sjoerg LLVM_DEBUG(dbgs() << "!\n");
5127330f729Sjoerg }
5137330f729Sjoerg
5147330f729Sjoerg // We are going to make changes to this loop. SCEV may be keeping cached info
5157330f729Sjoerg // about it, in particular about backedge taken count. The changes we make
5167330f729Sjoerg // are guaranteed to invalidate this information for our loop. It is tempting
5177330f729Sjoerg // to only invalidate the loop being unrolled, but it is incorrect as long as
5187330f729Sjoerg // all exiting branches from all inner loops have impact on the outer loops,
5197330f729Sjoerg // and if something changes inside them then any of outer loops may also
5207330f729Sjoerg // change. When we forget outermost loop, we also forget all contained loops
5217330f729Sjoerg // and this is what we need here.
5227330f729Sjoerg if (SE) {
5237330f729Sjoerg if (ULO.ForgetAllSCEV)
5247330f729Sjoerg SE->forgetAllLoops();
5257330f729Sjoerg else
5267330f729Sjoerg SE->forgetTopmostLoop(L);
5277330f729Sjoerg }
5287330f729Sjoerg
529*82d56013Sjoerg if (!LatchIsExiting)
530*82d56013Sjoerg ++NumUnrolledNotLatch;
531*82d56013Sjoerg Optional<bool> ContinueOnTrue = None;
5327330f729Sjoerg BasicBlock *LoopExit = nullptr;
533*82d56013Sjoerg if (ExitingBI) {
534*82d56013Sjoerg ContinueOnTrue = L->contains(ExitingBI->getSuccessor(0));
535*82d56013Sjoerg LoopExit = ExitingBI->getSuccessor(*ContinueOnTrue);
5367330f729Sjoerg }
5377330f729Sjoerg
5387330f729Sjoerg // For the first iteration of the loop, we should use the precloned values for
5397330f729Sjoerg // PHI nodes. Insert associations now.
5407330f729Sjoerg ValueToValueMapTy LastValueMap;
5417330f729Sjoerg std::vector<PHINode*> OrigPHINode;
5427330f729Sjoerg for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
5437330f729Sjoerg OrigPHINode.push_back(cast<PHINode>(I));
5447330f729Sjoerg }
5457330f729Sjoerg
5467330f729Sjoerg std::vector<BasicBlock *> Headers;
547*82d56013Sjoerg std::vector<BasicBlock *> ExitingBlocks;
548*82d56013Sjoerg std::vector<BasicBlock *> ExitingSucc;
5497330f729Sjoerg std::vector<BasicBlock *> Latches;
5507330f729Sjoerg Headers.push_back(Header);
5517330f729Sjoerg Latches.push_back(LatchBlock);
552*82d56013Sjoerg if (ExitingBI) {
553*82d56013Sjoerg ExitingBlocks.push_back(ExitingBI->getParent());
554*82d56013Sjoerg ExitingSucc.push_back(ExitingBI->getSuccessor(!(*ContinueOnTrue)));
5557330f729Sjoerg }
5567330f729Sjoerg
5577330f729Sjoerg // The current on-the-fly SSA update requires blocks to be processed in
5587330f729Sjoerg // reverse postorder so that LastValueMap contains the correct value at each
5597330f729Sjoerg // exit.
5607330f729Sjoerg LoopBlocksDFS DFS(L);
5617330f729Sjoerg DFS.perform(LI);
5627330f729Sjoerg
5637330f729Sjoerg // Stash the DFS iterators before adding blocks to the loop.
5647330f729Sjoerg LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
5657330f729Sjoerg LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
5667330f729Sjoerg
5677330f729Sjoerg std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
5687330f729Sjoerg
5697330f729Sjoerg // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
5707330f729Sjoerg // might break loop-simplified form for these loops (as they, e.g., would
5717330f729Sjoerg // share the same exit blocks). We'll keep track of loops for which we can
5727330f729Sjoerg // break this so that later we can re-simplify them.
5737330f729Sjoerg SmallSetVector<Loop *, 4> LoopsToSimplify;
5747330f729Sjoerg for (Loop *SubLoop : *L)
5757330f729Sjoerg LoopsToSimplify.insert(SubLoop);
5767330f729Sjoerg
577*82d56013Sjoerg // When a FSDiscriminator is enabled, we don't need to add the multiply
578*82d56013Sjoerg // factors to the discriminators.
579*82d56013Sjoerg if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator)
5807330f729Sjoerg for (BasicBlock *BB : L->getBlocks())
5817330f729Sjoerg for (Instruction &I : *BB)
5827330f729Sjoerg if (!isa<DbgInfoIntrinsic>(&I))
5837330f729Sjoerg if (const DILocation *DIL = I.getDebugLoc()) {
5847330f729Sjoerg auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
5857330f729Sjoerg if (NewDIL)
5867330f729Sjoerg I.setDebugLoc(NewDIL.getValue());
5877330f729Sjoerg else
5887330f729Sjoerg LLVM_DEBUG(dbgs()
5897330f729Sjoerg << "Failed to create new discriminator: "
5907330f729Sjoerg << DIL->getFilename() << " Line: " << DIL->getLine());
5917330f729Sjoerg }
5927330f729Sjoerg
593*82d56013Sjoerg // Identify what noalias metadata is inside the loop: if it is inside the
594*82d56013Sjoerg // loop, the associated metadata must be cloned for each iteration.
595*82d56013Sjoerg SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
596*82d56013Sjoerg identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
597*82d56013Sjoerg
5987330f729Sjoerg for (unsigned It = 1; It != ULO.Count; ++It) {
599*82d56013Sjoerg SmallVector<BasicBlock *, 8> NewBlocks;
6007330f729Sjoerg SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
6017330f729Sjoerg NewLoops[L] = L;
6027330f729Sjoerg
6037330f729Sjoerg for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
6047330f729Sjoerg ValueToValueMapTy VMap;
6057330f729Sjoerg BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
6067330f729Sjoerg Header->getParent()->getBasicBlockList().push_back(New);
6077330f729Sjoerg
6087330f729Sjoerg assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
6097330f729Sjoerg "Header should not be in a sub-loop");
6107330f729Sjoerg // Tell LI about New.
6117330f729Sjoerg const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
6127330f729Sjoerg if (OldLoop)
6137330f729Sjoerg LoopsToSimplify.insert(NewLoops[OldLoop]);
6147330f729Sjoerg
6157330f729Sjoerg if (*BB == Header)
6167330f729Sjoerg // Loop over all of the PHI nodes in the block, changing them to use
6177330f729Sjoerg // the incoming values from the previous block.
6187330f729Sjoerg for (PHINode *OrigPHI : OrigPHINode) {
6197330f729Sjoerg PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
6207330f729Sjoerg Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
6217330f729Sjoerg if (Instruction *InValI = dyn_cast<Instruction>(InVal))
6227330f729Sjoerg if (It > 1 && L->contains(InValI))
6237330f729Sjoerg InVal = LastValueMap[InValI];
6247330f729Sjoerg VMap[OrigPHI] = InVal;
6257330f729Sjoerg New->getInstList().erase(NewPHI);
6267330f729Sjoerg }
6277330f729Sjoerg
6287330f729Sjoerg // Update our running map of newest clones
6297330f729Sjoerg LastValueMap[*BB] = New;
6307330f729Sjoerg for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
6317330f729Sjoerg VI != VE; ++VI)
6327330f729Sjoerg LastValueMap[VI->first] = VI->second;
6337330f729Sjoerg
6347330f729Sjoerg // Add phi entries for newly created values to all exit blocks.
6357330f729Sjoerg for (BasicBlock *Succ : successors(*BB)) {
6367330f729Sjoerg if (L->contains(Succ))
6377330f729Sjoerg continue;
6387330f729Sjoerg for (PHINode &PHI : Succ->phis()) {
6397330f729Sjoerg Value *Incoming = PHI.getIncomingValueForBlock(*BB);
6407330f729Sjoerg ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
6417330f729Sjoerg if (It != LastValueMap.end())
6427330f729Sjoerg Incoming = It->second;
6437330f729Sjoerg PHI.addIncoming(Incoming, New);
6447330f729Sjoerg }
6457330f729Sjoerg }
6467330f729Sjoerg // Keep track of new headers and latches as we create them, so that
6477330f729Sjoerg // we can insert the proper branches later.
6487330f729Sjoerg if (*BB == Header)
6497330f729Sjoerg Headers.push_back(New);
6507330f729Sjoerg if (*BB == LatchBlock)
6517330f729Sjoerg Latches.push_back(New);
6527330f729Sjoerg
653*82d56013Sjoerg // Keep track of the exiting block and its successor block contained in
654*82d56013Sjoerg // the loop for the current iteration.
655*82d56013Sjoerg if (ExitingBI) {
656*82d56013Sjoerg if (*BB == ExitingBlocks[0])
657*82d56013Sjoerg ExitingBlocks.push_back(New);
658*82d56013Sjoerg if (*BB == ExitingSucc[0])
659*82d56013Sjoerg ExitingSucc.push_back(New);
6607330f729Sjoerg }
6617330f729Sjoerg
6627330f729Sjoerg NewBlocks.push_back(New);
6637330f729Sjoerg UnrolledLoopBlocks.push_back(New);
6647330f729Sjoerg
6657330f729Sjoerg // Update DomTree: since we just copy the loop body, and each copy has a
6667330f729Sjoerg // dedicated entry block (copy of the header block), this header's copy
6677330f729Sjoerg // dominates all copied blocks. That means, dominance relations in the
6687330f729Sjoerg // copied body are the same as in the original body.
6697330f729Sjoerg if (DT) {
6707330f729Sjoerg if (*BB == Header)
6717330f729Sjoerg DT->addNewBlock(New, Latches[It - 1]);
6727330f729Sjoerg else {
6737330f729Sjoerg auto BBDomNode = DT->getNode(*BB);
6747330f729Sjoerg auto BBIDom = BBDomNode->getIDom();
6757330f729Sjoerg BasicBlock *OriginalBBIDom = BBIDom->getBlock();
6767330f729Sjoerg DT->addNewBlock(
6777330f729Sjoerg New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
6787330f729Sjoerg }
6797330f729Sjoerg }
6807330f729Sjoerg }
6817330f729Sjoerg
6827330f729Sjoerg // Remap all instructions in the most recent iteration
683*82d56013Sjoerg remapInstructionsInBlocks(NewBlocks, LastValueMap);
684*82d56013Sjoerg for (BasicBlock *NewBlock : NewBlocks)
685*82d56013Sjoerg for (Instruction &I : *NewBlock)
686*82d56013Sjoerg if (auto *II = dyn_cast<AssumeInst>(&I))
6877330f729Sjoerg AC->registerAssumption(II);
688*82d56013Sjoerg
689*82d56013Sjoerg {
690*82d56013Sjoerg // Identify what other metadata depends on the cloned version. After
691*82d56013Sjoerg // cloning, replace the metadata with the corrected version for both
692*82d56013Sjoerg // memory instructions and noalias intrinsics.
693*82d56013Sjoerg std::string ext = (Twine("It") + Twine(It)).str();
694*82d56013Sjoerg cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
695*82d56013Sjoerg Header->getContext(), ext);
6967330f729Sjoerg }
6977330f729Sjoerg }
6987330f729Sjoerg
6997330f729Sjoerg // Loop over the PHI nodes in the original block, setting incoming values.
7007330f729Sjoerg for (PHINode *PN : OrigPHINode) {
7017330f729Sjoerg if (CompletelyUnroll) {
7027330f729Sjoerg PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
7037330f729Sjoerg Header->getInstList().erase(PN);
7047330f729Sjoerg } else if (ULO.Count > 1) {
7057330f729Sjoerg Value *InVal = PN->removeIncomingValue(LatchBlock, false);
7067330f729Sjoerg // If this value was defined in the loop, take the value defined by the
7077330f729Sjoerg // last iteration of the loop.
7087330f729Sjoerg if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
7097330f729Sjoerg if (L->contains(InValI))
7107330f729Sjoerg InVal = LastValueMap[InVal];
7117330f729Sjoerg }
7127330f729Sjoerg assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
7137330f729Sjoerg PN->addIncoming(InVal, Latches.back());
7147330f729Sjoerg }
7157330f729Sjoerg }
7167330f729Sjoerg
717*82d56013Sjoerg auto setDest = [](BasicBlock *Src, BasicBlock *Dest, BasicBlock *BlockInLoop,
718*82d56013Sjoerg bool NeedConditional, Optional<bool> ContinueOnTrue,
719*82d56013Sjoerg bool IsDestLoopExit) {
7207330f729Sjoerg auto *Term = cast<BranchInst>(Src->getTerminator());
7217330f729Sjoerg if (NeedConditional) {
7227330f729Sjoerg // Update the conditional branch's successor for the following
7237330f729Sjoerg // iteration.
724*82d56013Sjoerg assert(ContinueOnTrue.hasValue() &&
725*82d56013Sjoerg "Expecting valid ContinueOnTrue when NeedConditional is true");
726*82d56013Sjoerg Term->setSuccessor(!(*ContinueOnTrue), Dest);
7277330f729Sjoerg } else {
7287330f729Sjoerg // Remove phi operands at this loop exit
729*82d56013Sjoerg if (!IsDestLoopExit) {
7307330f729Sjoerg BasicBlock *BB = Src;
7317330f729Sjoerg for (BasicBlock *Succ : successors(BB)) {
7327330f729Sjoerg // Preserve the incoming value from BB if we are jumping to the block
7337330f729Sjoerg // in the current loop.
7347330f729Sjoerg if (Succ == BlockInLoop)
7357330f729Sjoerg continue;
7367330f729Sjoerg for (PHINode &Phi : Succ->phis())
7377330f729Sjoerg Phi.removeIncomingValue(BB, false);
7387330f729Sjoerg }
7397330f729Sjoerg }
7407330f729Sjoerg // Replace the conditional branch with an unconditional one.
7417330f729Sjoerg BranchInst::Create(Dest, Term);
7427330f729Sjoerg Term->eraseFromParent();
7437330f729Sjoerg }
7447330f729Sjoerg };
7457330f729Sjoerg
746*82d56013Sjoerg // Connect latches of the unrolled iterations to the headers of the next
747*82d56013Sjoerg // iteration. If the latch is also the exiting block, the conditional branch
748*82d56013Sjoerg // may have to be preserved.
7497330f729Sjoerg for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
7507330f729Sjoerg // The branch destination.
7517330f729Sjoerg unsigned j = (i + 1) % e;
7527330f729Sjoerg BasicBlock *Dest = Headers[j];
753*82d56013Sjoerg bool NeedConditional = LatchIsExiting;
7547330f729Sjoerg
755*82d56013Sjoerg if (LatchIsExiting) {
756*82d56013Sjoerg if (RuntimeTripCount && j != 0)
7577330f729Sjoerg NeedConditional = false;
7587330f729Sjoerg
7597330f729Sjoerg // For a complete unroll, make the last iteration end with a branch
7607330f729Sjoerg // to the exit block.
7617330f729Sjoerg if (CompletelyUnroll) {
7627330f729Sjoerg if (j == 0)
7637330f729Sjoerg Dest = LoopExit;
764*82d56013Sjoerg // If using trip count upper bound to completely unroll, we need to
765*82d56013Sjoerg // keep the conditional branch except the last one because the loop
766*82d56013Sjoerg // may exit after any iteration.
7677330f729Sjoerg assert(NeedConditional &&
7687330f729Sjoerg "NeedCondition cannot be modified by both complete "
7697330f729Sjoerg "unrolling and runtime unrolling");
7707330f729Sjoerg NeedConditional =
7717330f729Sjoerg (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
7727330f729Sjoerg } else if (j != BreakoutTrip &&
7737330f729Sjoerg (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
7747330f729Sjoerg // If we know the trip count or a multiple of it, we can safely use an
7757330f729Sjoerg // unconditional branch for some iterations.
7767330f729Sjoerg NeedConditional = false;
7777330f729Sjoerg }
7787330f729Sjoerg }
779*82d56013Sjoerg
780*82d56013Sjoerg setDest(Latches[i], Dest, Headers[i], NeedConditional, ContinueOnTrue,
781*82d56013Sjoerg Dest == LoopExit);
782*82d56013Sjoerg }
783*82d56013Sjoerg
784*82d56013Sjoerg if (!LatchIsExiting) {
785*82d56013Sjoerg // If the latch is not exiting, we may be able to simplify the conditional
786*82d56013Sjoerg // branches in the unrolled exiting blocks.
787*82d56013Sjoerg for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
7887330f729Sjoerg // The branch destination.
7897330f729Sjoerg unsigned j = (i + 1) % e;
7907330f729Sjoerg bool NeedConditional = true;
7917330f729Sjoerg
7927330f729Sjoerg if (RuntimeTripCount && j != 0)
7937330f729Sjoerg NeedConditional = false;
7947330f729Sjoerg
7957330f729Sjoerg if (CompletelyUnroll)
7967330f729Sjoerg // We cannot drop the conditional branch for the last condition, as we
7977330f729Sjoerg // may have to execute the loop body depending on the condition.
7987330f729Sjoerg NeedConditional = j == 0 || ULO.PreserveCondBr;
7997330f729Sjoerg else if (j != BreakoutTrip &&
8007330f729Sjoerg (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
8017330f729Sjoerg // If we know the trip count or a multiple of it, we can safely use an
8027330f729Sjoerg // unconditional branch for some iterations.
8037330f729Sjoerg NeedConditional = false;
8047330f729Sjoerg
805*82d56013Sjoerg // Conditional branches from non-latch exiting block have successors
806*82d56013Sjoerg // either in the same loop iteration or outside the loop. The branches are
807*82d56013Sjoerg // already correct.
808*82d56013Sjoerg if (NeedConditional)
809*82d56013Sjoerg continue;
810*82d56013Sjoerg setDest(ExitingBlocks[i], ExitingSucc[i], ExitingSucc[i], NeedConditional,
811*82d56013Sjoerg None, false);
8127330f729Sjoerg }
8137330f729Sjoerg
8147330f729Sjoerg // When completely unrolling, the last latch becomes unreachable.
815*82d56013Sjoerg if (CompletelyUnroll) {
816*82d56013Sjoerg BranchInst *Term = cast<BranchInst>(Latches.back()->getTerminator());
8177330f729Sjoerg new UnreachableInst(Term->getContext(), Term);
8187330f729Sjoerg Term->eraseFromParent();
8197330f729Sjoerg }
8207330f729Sjoerg }
8217330f729Sjoerg
8227330f729Sjoerg // Update dominators of blocks we might reach through exits.
8237330f729Sjoerg // Immediate dominator of such block might change, because we add more
8247330f729Sjoerg // routes which can lead to the exit: we can now reach it from the copied
8257330f729Sjoerg // iterations too.
8267330f729Sjoerg if (DT && ULO.Count > 1) {
8277330f729Sjoerg for (auto *BB : OriginalLoopBlocks) {
8287330f729Sjoerg auto *BBDomNode = DT->getNode(BB);
8297330f729Sjoerg SmallVector<BasicBlock *, 16> ChildrenToUpdate;
830*82d56013Sjoerg for (auto *ChildDomNode : BBDomNode->children()) {
8317330f729Sjoerg auto *ChildBB = ChildDomNode->getBlock();
8327330f729Sjoerg if (!L->contains(ChildBB))
8337330f729Sjoerg ChildrenToUpdate.push_back(ChildBB);
8347330f729Sjoerg }
8357330f729Sjoerg BasicBlock *NewIDom;
836*82d56013Sjoerg if (ExitingBI && BB == ExitingBlocks[0]) {
8377330f729Sjoerg // The latch is special because we emit unconditional branches in
8387330f729Sjoerg // some cases where the original loop contained a conditional branch.
8397330f729Sjoerg // Since the latch is always at the bottom of the loop, if the latch
8407330f729Sjoerg // dominated an exit before unrolling, the new dominator of that exit
8417330f729Sjoerg // must also be a latch. Specifically, the dominator is the first
8427330f729Sjoerg // latch which ends in a conditional branch, or the last latch if
8437330f729Sjoerg // there is no such latch.
844*82d56013Sjoerg // For loops exiting from non latch exiting block, we limit the
845*82d56013Sjoerg // branch simplification to single exiting block loops.
846*82d56013Sjoerg NewIDom = ExitingBlocks.back();
847*82d56013Sjoerg for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
848*82d56013Sjoerg Instruction *Term = ExitingBlocks[i]->getTerminator();
8497330f729Sjoerg if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
850*82d56013Sjoerg NewIDom =
851*82d56013Sjoerg DT->findNearestCommonDominator(ExitingBlocks[i], Latches[i]);
8527330f729Sjoerg break;
8537330f729Sjoerg }
8547330f729Sjoerg }
8557330f729Sjoerg } else {
8567330f729Sjoerg // The new idom of the block will be the nearest common dominator
8577330f729Sjoerg // of all copies of the previous idom. This is equivalent to the
8587330f729Sjoerg // nearest common dominator of the previous idom and the first latch,
8597330f729Sjoerg // which dominates all copies of the previous idom.
8607330f729Sjoerg NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
8617330f729Sjoerg }
8627330f729Sjoerg for (auto *ChildBB : ChildrenToUpdate)
8637330f729Sjoerg DT->changeImmediateDominator(ChildBB, NewIDom);
8647330f729Sjoerg }
8657330f729Sjoerg }
8667330f729Sjoerg
8677330f729Sjoerg assert(!DT || !UnrollVerifyDomtree ||
8687330f729Sjoerg DT->verify(DominatorTree::VerificationLevel::Fast));
8697330f729Sjoerg
8707330f729Sjoerg DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
8717330f729Sjoerg // Merge adjacent basic blocks, if possible.
8727330f729Sjoerg for (BasicBlock *Latch : Latches) {
8737330f729Sjoerg BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
8747330f729Sjoerg assert((Term ||
8757330f729Sjoerg (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
8767330f729Sjoerg "Need a branch as terminator, except when fully unrolling with "
8777330f729Sjoerg "unconditional latch");
8787330f729Sjoerg if (Term && Term->isUnconditional()) {
8797330f729Sjoerg BasicBlock *Dest = Term->getSuccessor(0);
8807330f729Sjoerg BasicBlock *Fold = Dest->getUniquePredecessor();
8817330f729Sjoerg if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
8827330f729Sjoerg // Dest has been folded into Fold. Update our worklists accordingly.
8837330f729Sjoerg std::replace(Latches.begin(), Latches.end(), Dest, Fold);
884*82d56013Sjoerg llvm::erase_value(UnrolledLoopBlocks, Dest);
8857330f729Sjoerg }
8867330f729Sjoerg }
8877330f729Sjoerg }
8887330f729Sjoerg // Apply updates to the DomTree.
8897330f729Sjoerg DT = &DTU.getDomTree();
8907330f729Sjoerg
8917330f729Sjoerg // At this point, the code is well formed. We now simplify the unrolled loop,
8927330f729Sjoerg // doing constant propagation and dead code elimination as we go.
8937330f729Sjoerg simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
894*82d56013Sjoerg SE, DT, AC, TTI);
8957330f729Sjoerg
8967330f729Sjoerg NumCompletelyUnrolled += CompletelyUnroll;
8977330f729Sjoerg ++NumUnrolled;
8987330f729Sjoerg
8997330f729Sjoerg Loop *OuterL = L->getParentLoop();
9007330f729Sjoerg // Update LoopInfo if the loop is completely removed.
9017330f729Sjoerg if (CompletelyUnroll)
9027330f729Sjoerg LI->erase(L);
9037330f729Sjoerg
9047330f729Sjoerg // After complete unrolling most of the blocks should be contained in OuterL.
9057330f729Sjoerg // However, some of them might happen to be out of OuterL (e.g. if they
9067330f729Sjoerg // precede a loop exit). In this case we might need to insert PHI nodes in
9077330f729Sjoerg // order to preserve LCSSA form.
9087330f729Sjoerg // We don't need to check this if we already know that we need to fix LCSSA
9097330f729Sjoerg // form.
9107330f729Sjoerg // TODO: For now we just recompute LCSSA for the outer loop in this case, but
9117330f729Sjoerg // it should be possible to fix it in-place.
9127330f729Sjoerg if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
9137330f729Sjoerg NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
9147330f729Sjoerg
9157330f729Sjoerg // If we have a pass and a DominatorTree we should re-simplify impacted loops
9167330f729Sjoerg // to ensure subsequent analyses can rely on this form. We want to simplify
9177330f729Sjoerg // at least one layer outside of the loop that was unrolled so that any
9187330f729Sjoerg // changes to the parent loop exposed by the unrolling are considered.
9197330f729Sjoerg if (DT) {
9207330f729Sjoerg if (OuterL) {
9217330f729Sjoerg // OuterL includes all loops for which we can break loop-simplify, so
9227330f729Sjoerg // it's sufficient to simplify only it (it'll recursively simplify inner
9237330f729Sjoerg // loops too).
9247330f729Sjoerg if (NeedToFixLCSSA) {
9257330f729Sjoerg // LCSSA must be performed on the outermost affected loop. The unrolled
9267330f729Sjoerg // loop's last loop latch is guaranteed to be in the outermost loop
9277330f729Sjoerg // after LoopInfo's been updated by LoopInfo::erase.
9287330f729Sjoerg Loop *LatchLoop = LI->getLoopFor(Latches.back());
9297330f729Sjoerg Loop *FixLCSSALoop = OuterL;
9307330f729Sjoerg if (!FixLCSSALoop->contains(LatchLoop))
9317330f729Sjoerg while (FixLCSSALoop->getParentLoop() != LatchLoop)
9327330f729Sjoerg FixLCSSALoop = FixLCSSALoop->getParentLoop();
9337330f729Sjoerg
9347330f729Sjoerg formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
9357330f729Sjoerg } else if (PreserveLCSSA) {
9367330f729Sjoerg assert(OuterL->isLCSSAForm(*DT) &&
9377330f729Sjoerg "Loops should be in LCSSA form after loop-unroll.");
9387330f729Sjoerg }
9397330f729Sjoerg
9407330f729Sjoerg // TODO: That potentially might be compile-time expensive. We should try
9417330f729Sjoerg // to fix the loop-simplified form incrementally.
9427330f729Sjoerg simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
9437330f729Sjoerg } else {
9447330f729Sjoerg // Simplify loops for which we might've broken loop-simplify form.
9457330f729Sjoerg for (Loop *SubLoop : LoopsToSimplify)
9467330f729Sjoerg simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
9477330f729Sjoerg }
9487330f729Sjoerg }
9497330f729Sjoerg
9507330f729Sjoerg return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
9517330f729Sjoerg : LoopUnrollResult::PartiallyUnrolled;
9527330f729Sjoerg }
9537330f729Sjoerg
9547330f729Sjoerg /// Given an llvm.loop loop id metadata node, returns the loop hint metadata
9557330f729Sjoerg /// node with the given name (for example, "llvm.loop.unroll.count"). If no
9567330f729Sjoerg /// such metadata node exists, then nullptr is returned.
GetUnrollMetadata(MDNode * LoopID,StringRef Name)9577330f729Sjoerg MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
9587330f729Sjoerg // First operand should refer to the loop id itself.
9597330f729Sjoerg assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
9607330f729Sjoerg assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
9617330f729Sjoerg
9627330f729Sjoerg for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
9637330f729Sjoerg MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
9647330f729Sjoerg if (!MD)
9657330f729Sjoerg continue;
9667330f729Sjoerg
9677330f729Sjoerg MDString *S = dyn_cast<MDString>(MD->getOperand(0));
9687330f729Sjoerg if (!S)
9697330f729Sjoerg continue;
9707330f729Sjoerg
9717330f729Sjoerg if (Name.equals(S->getString()))
9727330f729Sjoerg return MD;
9737330f729Sjoerg }
9747330f729Sjoerg return nullptr;
9757330f729Sjoerg }
976