10b57cec5SDimitry Andric //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements some loop unrolling utilities. It does not define any 100b57cec5SDimitry Andric // actual pass or policy, but provides a single function to perform loop 110b57cec5SDimitry Andric // unrolling. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // The process of unrolling can produce extraneous basic blocks linked with 140b57cec5SDimitry Andric // unconditional branches. This will be corrected in the future. 150b57cec5SDimitry Andric // 160b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 170b57cec5SDimitry Andric 185ffd83dbSDimitry Andric #include "llvm/ADT/ArrayRef.h" 195ffd83dbSDimitry Andric #include "llvm/ADT/DenseMap.h" 205ffd83dbSDimitry Andric #include "llvm/ADT/STLExtras.h" 21*0fca6ea1SDimitry Andric #include "llvm/ADT/ScopedHashTable.h" 225ffd83dbSDimitry Andric #include "llvm/ADT/SetVector.h" 235ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 240b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 255ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h" 265ffd83dbSDimitry Andric #include "llvm/ADT/Twine.h" 275ffd83dbSDimitry Andric #include "llvm/ADT/ilist_iterator.h" 28*0fca6ea1SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 290b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 305ffd83dbSDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 310b57cec5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 325ffd83dbSDimitry Andric #include "llvm/Analysis/LoopInfo.h" 330b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h" 34*0fca6ea1SDimitry Andric #include "llvm/Analysis/MemorySSA.h" 350b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 360b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 370b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 385ffd83dbSDimitry Andric #include "llvm/IR/CFG.h" 395ffd83dbSDimitry Andric #include "llvm/IR/Constants.h" 400b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 415ffd83dbSDimitry Andric #include "llvm/IR/DebugLoc.h" 425ffd83dbSDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 430b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 445ffd83dbSDimitry Andric #include "llvm/IR/Function.h" 455ffd83dbSDimitry Andric #include "llvm/IR/Instruction.h" 465ffd83dbSDimitry Andric #include "llvm/IR/Instructions.h" 470b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 485ffd83dbSDimitry Andric #include "llvm/IR/Metadata.h" 495ffd83dbSDimitry Andric #include "llvm/IR/Module.h" 5006c3fb27SDimitry Andric #include "llvm/IR/PatternMatch.h" 515ffd83dbSDimitry Andric #include "llvm/IR/Use.h" 525ffd83dbSDimitry Andric #include "llvm/IR/User.h" 535ffd83dbSDimitry Andric #include "llvm/IR/ValueHandle.h" 545ffd83dbSDimitry Andric #include "llvm/IR/ValueMap.h" 555ffd83dbSDimitry Andric #include "llvm/Support/Casting.h" 56480093f4SDimitry Andric #include "llvm/Support/CommandLine.h" 570b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 585ffd83dbSDimitry Andric #include "llvm/Support/GenericDomTree.h" 595ffd83dbSDimitry Andric #include "llvm/Support/MathExtras.h" 600b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 610b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 620b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 63480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 640b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopSimplify.h" 650b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 660b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SimplifyIndVar.h" 670b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h" 685ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h" 695ffd83dbSDimitry Andric #include <algorithm> 705ffd83dbSDimitry Andric #include <assert.h> 71bdd1243dSDimitry Andric #include <numeric> 725ffd83dbSDimitry Andric #include <type_traits> 735ffd83dbSDimitry Andric #include <vector> 745ffd83dbSDimitry Andric 755ffd83dbSDimitry Andric namespace llvm { 765ffd83dbSDimitry Andric class DataLayout; 775ffd83dbSDimitry Andric class Value; 785ffd83dbSDimitry Andric } // namespace llvm 795ffd83dbSDimitry Andric 800b57cec5SDimitry Andric using namespace llvm; 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll" 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric // TODO: Should these be here or in LoopUnroll? 850b57cec5SDimitry Andric STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); 860b57cec5SDimitry Andric STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); 875ffd83dbSDimitry Andric STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional " 885ffd83dbSDimitry Andric "latch (completely or otherwise)"); 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric static cl::opt<bool> 910b57cec5SDimitry Andric UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, 920b57cec5SDimitry Andric cl::desc("Allow runtime unrolled loops to be unrolled " 930b57cec5SDimitry Andric "with epilog instead of prolog.")); 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric static cl::opt<bool> 960b57cec5SDimitry Andric UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, 970b57cec5SDimitry Andric cl::desc("Verify domtree after unrolling"), 980b57cec5SDimitry Andric #ifdef EXPENSIVE_CHECKS 990b57cec5SDimitry Andric cl::init(true) 1000b57cec5SDimitry Andric #else 1010b57cec5SDimitry Andric cl::init(false) 1020b57cec5SDimitry Andric #endif 1030b57cec5SDimitry Andric ); 1040b57cec5SDimitry Andric 10504eeddc0SDimitry Andric static cl::opt<bool> 10604eeddc0SDimitry Andric UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden, 10704eeddc0SDimitry Andric cl::desc("Verify loopinfo after unrolling"), 10804eeddc0SDimitry Andric #ifdef EXPENSIVE_CHECKS 10904eeddc0SDimitry Andric cl::init(true) 11004eeddc0SDimitry Andric #else 11104eeddc0SDimitry Andric cl::init(false) 11204eeddc0SDimitry Andric #endif 11304eeddc0SDimitry Andric ); 11404eeddc0SDimitry Andric 11504eeddc0SDimitry Andric 1160b57cec5SDimitry Andric /// Check if unrolling created a situation where we need to insert phi nodes to 1170b57cec5SDimitry Andric /// preserve LCSSA form. 1180b57cec5SDimitry Andric /// \param Blocks is a vector of basic blocks representing unrolled loop. 1190b57cec5SDimitry Andric /// \param L is the outer loop. 1200b57cec5SDimitry Andric /// It's possible that some of the blocks are in L, and some are not. In this 1210b57cec5SDimitry Andric /// case, if there is a use is outside L, and definition is inside L, we need to 1220b57cec5SDimitry Andric /// insert a phi-node, otherwise LCSSA will be broken. 1230b57cec5SDimitry Andric /// The function is just a helper function for llvm::UnrollLoop that returns 1240b57cec5SDimitry Andric /// true if this situation occurs, indicating that LCSSA needs to be fixed. 125e8d8bef9SDimitry Andric static bool needToInsertPhisForLCSSA(Loop *L, 126e8d8bef9SDimitry Andric const std::vector<BasicBlock *> &Blocks, 1270b57cec5SDimitry Andric LoopInfo *LI) { 1280b57cec5SDimitry Andric for (BasicBlock *BB : Blocks) { 1290b57cec5SDimitry Andric if (LI->getLoopFor(BB) == L) 1300b57cec5SDimitry Andric continue; 1310b57cec5SDimitry Andric for (Instruction &I : *BB) { 1320b57cec5SDimitry Andric for (Use &U : I.operands()) { 133e8d8bef9SDimitry Andric if (const auto *Def = dyn_cast<Instruction>(U)) { 1340b57cec5SDimitry Andric Loop *DefLoop = LI->getLoopFor(Def->getParent()); 1350b57cec5SDimitry Andric if (!DefLoop) 1360b57cec5SDimitry Andric continue; 1370b57cec5SDimitry Andric if (DefLoop->contains(L)) 1380b57cec5SDimitry Andric return true; 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric } 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric return false; 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric /// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary 1470b57cec5SDimitry Andric /// and adds a mapping from the original loop to the new loop to NewLoops. 1480b57cec5SDimitry Andric /// Returns nullptr if no new loop was created and a pointer to the 1490b57cec5SDimitry Andric /// original loop OriginalBB was part of otherwise. 1500b57cec5SDimitry Andric const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, 1510b57cec5SDimitry Andric BasicBlock *ClonedBB, LoopInfo *LI, 1520b57cec5SDimitry Andric NewLoopsMap &NewLoops) { 1530b57cec5SDimitry Andric // Figure out which loop New is in. 1540b57cec5SDimitry Andric const Loop *OldLoop = LI->getLoopFor(OriginalBB); 1550b57cec5SDimitry Andric assert(OldLoop && "Should (at least) be in the loop being unrolled!"); 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric Loop *&NewLoop = NewLoops[OldLoop]; 1580b57cec5SDimitry Andric if (!NewLoop) { 1590b57cec5SDimitry Andric // Found a new sub-loop. 1600b57cec5SDimitry Andric assert(OriginalBB == OldLoop->getHeader() && 1610b57cec5SDimitry Andric "Header should be first in RPO"); 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric NewLoop = LI->AllocateLoop(); 1640b57cec5SDimitry Andric Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric if (NewLoopParent) 1670b57cec5SDimitry Andric NewLoopParent->addChildLoop(NewLoop); 1680b57cec5SDimitry Andric else 1690b57cec5SDimitry Andric LI->addTopLevelLoop(NewLoop); 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric NewLoop->addBasicBlockToLoop(ClonedBB, *LI); 1720b57cec5SDimitry Andric return OldLoop; 1730b57cec5SDimitry Andric } else { 1740b57cec5SDimitry Andric NewLoop->addBasicBlockToLoop(ClonedBB, *LI); 1750b57cec5SDimitry Andric return nullptr; 1760b57cec5SDimitry Andric } 1770b57cec5SDimitry Andric } 1780b57cec5SDimitry Andric 1790b57cec5SDimitry Andric /// The function chooses which type of unroll (epilog or prolog) is more 1800b57cec5SDimitry Andric /// profitabale. 1810b57cec5SDimitry Andric /// Epilog unroll is more profitable when there is PHI that starts from 1820b57cec5SDimitry Andric /// constant. In this case epilog will leave PHI start from constant, 1830b57cec5SDimitry Andric /// but prolog will convert it to non-constant. 1840b57cec5SDimitry Andric /// 1850b57cec5SDimitry Andric /// loop: 1860b57cec5SDimitry Andric /// PN = PHI [I, Latch], [CI, PreHeader] 1870b57cec5SDimitry Andric /// I = foo(PN) 1880b57cec5SDimitry Andric /// ... 1890b57cec5SDimitry Andric /// 1900b57cec5SDimitry Andric /// Epilog unroll case. 1910b57cec5SDimitry Andric /// loop: 1920b57cec5SDimitry Andric /// PN = PHI [I2, Latch], [CI, PreHeader] 1930b57cec5SDimitry Andric /// I1 = foo(PN) 1940b57cec5SDimitry Andric /// I2 = foo(I1) 1950b57cec5SDimitry Andric /// ... 1960b57cec5SDimitry Andric /// Prolog unroll case. 1970b57cec5SDimitry Andric /// NewPN = PHI [PrologI, Prolog], [CI, PreHeader] 1980b57cec5SDimitry Andric /// loop: 1990b57cec5SDimitry Andric /// PN = PHI [I2, Latch], [NewPN, PreHeader] 2000b57cec5SDimitry Andric /// I1 = foo(PN) 2010b57cec5SDimitry Andric /// I2 = foo(I1) 2020b57cec5SDimitry Andric /// ... 2030b57cec5SDimitry Andric /// 2040b57cec5SDimitry Andric static bool isEpilogProfitable(Loop *L) { 2050b57cec5SDimitry Andric BasicBlock *PreHeader = L->getLoopPreheader(); 2060b57cec5SDimitry Andric BasicBlock *Header = L->getHeader(); 2070b57cec5SDimitry Andric assert(PreHeader && Header); 2080b57cec5SDimitry Andric for (const PHINode &PN : Header->phis()) { 2090b57cec5SDimitry Andric if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader))) 2100b57cec5SDimitry Andric return true; 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric return false; 2130b57cec5SDimitry Andric } 2140b57cec5SDimitry Andric 215*0fca6ea1SDimitry Andric struct LoadValue { 216*0fca6ea1SDimitry Andric Instruction *DefI = nullptr; 217*0fca6ea1SDimitry Andric unsigned Generation = 0; 218*0fca6ea1SDimitry Andric LoadValue() = default; 219*0fca6ea1SDimitry Andric LoadValue(Instruction *Inst, unsigned Generation) 220*0fca6ea1SDimitry Andric : DefI(Inst), Generation(Generation) {} 221*0fca6ea1SDimitry Andric }; 222*0fca6ea1SDimitry Andric 223*0fca6ea1SDimitry Andric class StackNode { 224*0fca6ea1SDimitry Andric ScopedHashTable<const SCEV *, LoadValue>::ScopeTy LoadScope; 225*0fca6ea1SDimitry Andric unsigned CurrentGeneration; 226*0fca6ea1SDimitry Andric unsigned ChildGeneration; 227*0fca6ea1SDimitry Andric DomTreeNode *Node; 228*0fca6ea1SDimitry Andric DomTreeNode::const_iterator ChildIter; 229*0fca6ea1SDimitry Andric DomTreeNode::const_iterator EndIter; 230*0fca6ea1SDimitry Andric bool Processed = false; 231*0fca6ea1SDimitry Andric 232*0fca6ea1SDimitry Andric public: 233*0fca6ea1SDimitry Andric StackNode(ScopedHashTable<const SCEV *, LoadValue> &AvailableLoads, 234*0fca6ea1SDimitry Andric unsigned cg, DomTreeNode *N, DomTreeNode::const_iterator Child, 235*0fca6ea1SDimitry Andric DomTreeNode::const_iterator End) 236*0fca6ea1SDimitry Andric : LoadScope(AvailableLoads), CurrentGeneration(cg), ChildGeneration(cg), 237*0fca6ea1SDimitry Andric Node(N), ChildIter(Child), EndIter(End) {} 238*0fca6ea1SDimitry Andric // Accessors. 239*0fca6ea1SDimitry Andric unsigned currentGeneration() const { return CurrentGeneration; } 240*0fca6ea1SDimitry Andric unsigned childGeneration() const { return ChildGeneration; } 241*0fca6ea1SDimitry Andric void childGeneration(unsigned generation) { ChildGeneration = generation; } 242*0fca6ea1SDimitry Andric DomTreeNode *node() { return Node; } 243*0fca6ea1SDimitry Andric DomTreeNode::const_iterator childIter() const { return ChildIter; } 244*0fca6ea1SDimitry Andric 245*0fca6ea1SDimitry Andric DomTreeNode *nextChild() { 246*0fca6ea1SDimitry Andric DomTreeNode *Child = *ChildIter; 247*0fca6ea1SDimitry Andric ++ChildIter; 248*0fca6ea1SDimitry Andric return Child; 249*0fca6ea1SDimitry Andric } 250*0fca6ea1SDimitry Andric 251*0fca6ea1SDimitry Andric DomTreeNode::const_iterator end() const { return EndIter; } 252*0fca6ea1SDimitry Andric bool isProcessed() const { return Processed; } 253*0fca6ea1SDimitry Andric void process() { Processed = true; } 254*0fca6ea1SDimitry Andric }; 255*0fca6ea1SDimitry Andric 256*0fca6ea1SDimitry Andric Value *getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration, 257*0fca6ea1SDimitry Andric BatchAAResults &BAA, 258*0fca6ea1SDimitry Andric function_ref<MemorySSA *()> GetMSSA) { 259*0fca6ea1SDimitry Andric if (!LV.DefI) 260*0fca6ea1SDimitry Andric return nullptr; 261*0fca6ea1SDimitry Andric if (LV.DefI->getType() != LI->getType()) 262*0fca6ea1SDimitry Andric return nullptr; 263*0fca6ea1SDimitry Andric if (LV.Generation != CurrentGeneration) { 264*0fca6ea1SDimitry Andric MemorySSA *MSSA = GetMSSA(); 265*0fca6ea1SDimitry Andric if (!MSSA) 266*0fca6ea1SDimitry Andric return nullptr; 267*0fca6ea1SDimitry Andric auto *EarlierMA = MSSA->getMemoryAccess(LV.DefI); 268*0fca6ea1SDimitry Andric MemoryAccess *LaterDef = 269*0fca6ea1SDimitry Andric MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA); 270*0fca6ea1SDimitry Andric if (!MSSA->dominates(LaterDef, EarlierMA)) 271*0fca6ea1SDimitry Andric return nullptr; 272*0fca6ea1SDimitry Andric } 273*0fca6ea1SDimitry Andric return LV.DefI; 274*0fca6ea1SDimitry Andric } 275*0fca6ea1SDimitry Andric 276*0fca6ea1SDimitry Andric void loadCSE(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, 277*0fca6ea1SDimitry Andric BatchAAResults &BAA, function_ref<MemorySSA *()> GetMSSA) { 278*0fca6ea1SDimitry Andric ScopedHashTable<const SCEV *, LoadValue> AvailableLoads; 279*0fca6ea1SDimitry Andric SmallVector<std::unique_ptr<StackNode>> NodesToProcess; 280*0fca6ea1SDimitry Andric DomTreeNode *HeaderD = DT.getNode(L->getHeader()); 281*0fca6ea1SDimitry Andric NodesToProcess.emplace_back(new StackNode(AvailableLoads, 0, HeaderD, 282*0fca6ea1SDimitry Andric HeaderD->begin(), HeaderD->end())); 283*0fca6ea1SDimitry Andric 284*0fca6ea1SDimitry Andric unsigned CurrentGeneration = 0; 285*0fca6ea1SDimitry Andric while (!NodesToProcess.empty()) { 286*0fca6ea1SDimitry Andric StackNode *NodeToProcess = &*NodesToProcess.back(); 287*0fca6ea1SDimitry Andric 288*0fca6ea1SDimitry Andric CurrentGeneration = NodeToProcess->currentGeneration(); 289*0fca6ea1SDimitry Andric 290*0fca6ea1SDimitry Andric if (!NodeToProcess->isProcessed()) { 291*0fca6ea1SDimitry Andric // Process the node. 292*0fca6ea1SDimitry Andric 293*0fca6ea1SDimitry Andric // If this block has a single predecessor, then the predecessor is the 294*0fca6ea1SDimitry Andric // parent 295*0fca6ea1SDimitry Andric // of the domtree node and all of the live out memory values are still 296*0fca6ea1SDimitry Andric // current in this block. If this block has multiple predecessors, then 297*0fca6ea1SDimitry Andric // they could have invalidated the live-out memory values of our parent 298*0fca6ea1SDimitry Andric // value. For now, just be conservative and invalidate memory if this 299*0fca6ea1SDimitry Andric // block has multiple predecessors. 300*0fca6ea1SDimitry Andric if (!NodeToProcess->node()->getBlock()->getSinglePredecessor()) 301*0fca6ea1SDimitry Andric ++CurrentGeneration; 302*0fca6ea1SDimitry Andric for (auto &I : make_early_inc_range(*NodeToProcess->node()->getBlock())) { 303*0fca6ea1SDimitry Andric 304*0fca6ea1SDimitry Andric auto *Load = dyn_cast<LoadInst>(&I); 305*0fca6ea1SDimitry Andric if (!Load || !Load->isSimple()) { 306*0fca6ea1SDimitry Andric if (I.mayWriteToMemory()) 307*0fca6ea1SDimitry Andric CurrentGeneration++; 308*0fca6ea1SDimitry Andric continue; 309*0fca6ea1SDimitry Andric } 310*0fca6ea1SDimitry Andric 311*0fca6ea1SDimitry Andric const SCEV *PtrSCEV = SE.getSCEV(Load->getPointerOperand()); 312*0fca6ea1SDimitry Andric LoadValue LV = AvailableLoads.lookup(PtrSCEV); 313*0fca6ea1SDimitry Andric if (Value *M = 314*0fca6ea1SDimitry Andric getMatchingValue(LV, Load, CurrentGeneration, BAA, GetMSSA)) { 315*0fca6ea1SDimitry Andric if (LI.replacementPreservesLCSSAForm(Load, M)) { 316*0fca6ea1SDimitry Andric Load->replaceAllUsesWith(M); 317*0fca6ea1SDimitry Andric Load->eraseFromParent(); 318*0fca6ea1SDimitry Andric } 319*0fca6ea1SDimitry Andric } else { 320*0fca6ea1SDimitry Andric AvailableLoads.insert(PtrSCEV, LoadValue(Load, CurrentGeneration)); 321*0fca6ea1SDimitry Andric } 322*0fca6ea1SDimitry Andric } 323*0fca6ea1SDimitry Andric NodeToProcess->childGeneration(CurrentGeneration); 324*0fca6ea1SDimitry Andric NodeToProcess->process(); 325*0fca6ea1SDimitry Andric } else if (NodeToProcess->childIter() != NodeToProcess->end()) { 326*0fca6ea1SDimitry Andric // Push the next child onto the stack. 327*0fca6ea1SDimitry Andric DomTreeNode *Child = NodeToProcess->nextChild(); 328*0fca6ea1SDimitry Andric if (!L->contains(Child->getBlock())) 329*0fca6ea1SDimitry Andric continue; 330*0fca6ea1SDimitry Andric NodesToProcess.emplace_back( 331*0fca6ea1SDimitry Andric new StackNode(AvailableLoads, NodeToProcess->childGeneration(), Child, 332*0fca6ea1SDimitry Andric Child->begin(), Child->end())); 333*0fca6ea1SDimitry Andric } else { 334*0fca6ea1SDimitry Andric // It has been processed, and there are no more children to process, 335*0fca6ea1SDimitry Andric // so delete it and pop it off the stack. 336*0fca6ea1SDimitry Andric NodesToProcess.pop_back(); 337*0fca6ea1SDimitry Andric } 338*0fca6ea1SDimitry Andric } 339*0fca6ea1SDimitry Andric } 340*0fca6ea1SDimitry Andric 3410b57cec5SDimitry Andric /// Perform some cleanup and simplifications on loops after unrolling. It is 3420b57cec5SDimitry Andric /// useful to simplify the IV's in the new loop, as well as do a quick 3430b57cec5SDimitry Andric /// simplify/dce pass of the instructions. 3440b57cec5SDimitry Andric void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, 3450b57cec5SDimitry Andric ScalarEvolution *SE, DominatorTree *DT, 3465ffd83dbSDimitry Andric AssumptionCache *AC, 347*0fca6ea1SDimitry Andric const TargetTransformInfo *TTI, 348*0fca6ea1SDimitry Andric AAResults *AA) { 34906c3fb27SDimitry Andric using namespace llvm::PatternMatch; 35006c3fb27SDimitry Andric 3510b57cec5SDimitry Andric // Simplify any new induction variables in the partially unrolled loop. 3520b57cec5SDimitry Andric if (SE && SimplifyIVs) { 3530b57cec5SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts; 3545ffd83dbSDimitry Andric simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts); 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric // Aggressively clean up dead instructions that simplifyLoopIVs already 3570b57cec5SDimitry Andric // identified. Any remaining should be cleaned up below. 3585ffd83dbSDimitry Andric while (!DeadInsts.empty()) { 3595ffd83dbSDimitry Andric Value *V = DeadInsts.pop_back_val(); 3605ffd83dbSDimitry Andric if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 3610b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(Inst); 3620b57cec5SDimitry Andric } 363*0fca6ea1SDimitry Andric 364*0fca6ea1SDimitry Andric if (AA) { 365*0fca6ea1SDimitry Andric std::unique_ptr<MemorySSA> MSSA = nullptr; 366*0fca6ea1SDimitry Andric BatchAAResults BAA(*AA); 367*0fca6ea1SDimitry Andric loadCSE(L, *DT, *SE, *LI, BAA, [L, AA, DT, &MSSA]() -> MemorySSA * { 368*0fca6ea1SDimitry Andric if (!MSSA) 369*0fca6ea1SDimitry Andric MSSA.reset(new MemorySSA(*L, AA, DT)); 370*0fca6ea1SDimitry Andric return &*MSSA; 371*0fca6ea1SDimitry Andric }); 372*0fca6ea1SDimitry Andric } 3735ffd83dbSDimitry Andric } 3740b57cec5SDimitry Andric 375fe6060f1SDimitry Andric // At this point, the code is well formed. Perform constprop, instsimplify, 376fe6060f1SDimitry Andric // and dce. 377*0fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout(); 378fe6060f1SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts; 3790b57cec5SDimitry Andric for (BasicBlock *BB : L->getBlocks()) { 380*0fca6ea1SDimitry Andric // Remove repeated debug instructions after loop unrolling. 381*0fca6ea1SDimitry Andric if (BB->getParent()->getSubprogram()) 382*0fca6ea1SDimitry Andric RemoveRedundantDbgInstrs(BB); 383*0fca6ea1SDimitry Andric 384349cc55cSDimitry Andric for (Instruction &Inst : llvm::make_early_inc_range(*BB)) { 38581ad6265SDimitry Andric if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC})) 386349cc55cSDimitry Andric if (LI->replacementPreservesLCSSAForm(&Inst, V)) 387349cc55cSDimitry Andric Inst.replaceAllUsesWith(V); 388349cc55cSDimitry Andric if (isInstructionTriviallyDead(&Inst)) 389349cc55cSDimitry Andric DeadInsts.emplace_back(&Inst); 39006c3fb27SDimitry Andric 39106c3fb27SDimitry Andric // Fold ((add X, C1), C2) to (add X, C1+C2). This is very common in 39206c3fb27SDimitry Andric // unrolled loops, and handling this early allows following code to 39306c3fb27SDimitry Andric // identify the IV as a "simple recurrence" without first folding away 39406c3fb27SDimitry Andric // a long chain of adds. 39506c3fb27SDimitry Andric { 39606c3fb27SDimitry Andric Value *X; 39706c3fb27SDimitry Andric const APInt *C1, *C2; 39806c3fb27SDimitry Andric if (match(&Inst, m_Add(m_Add(m_Value(X), m_APInt(C1)), m_APInt(C2)))) { 39906c3fb27SDimitry Andric auto *InnerI = dyn_cast<Instruction>(Inst.getOperand(0)); 40006c3fb27SDimitry Andric auto *InnerOBO = cast<OverflowingBinaryOperator>(Inst.getOperand(0)); 40106c3fb27SDimitry Andric bool SignedOverflow; 40206c3fb27SDimitry Andric APInt NewC = C1->sadd_ov(*C2, SignedOverflow); 40306c3fb27SDimitry Andric Inst.setOperand(0, X); 40406c3fb27SDimitry Andric Inst.setOperand(1, ConstantInt::get(Inst.getType(), NewC)); 40506c3fb27SDimitry Andric Inst.setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap() && 40606c3fb27SDimitry Andric InnerOBO->hasNoUnsignedWrap()); 40706c3fb27SDimitry Andric Inst.setHasNoSignedWrap(Inst.hasNoSignedWrap() && 40806c3fb27SDimitry Andric InnerOBO->hasNoSignedWrap() && 40906c3fb27SDimitry Andric !SignedOverflow); 41006c3fb27SDimitry Andric if (InnerI && isInstructionTriviallyDead(InnerI)) 41106c3fb27SDimitry Andric DeadInsts.emplace_back(InnerI); 41206c3fb27SDimitry Andric } 41306c3fb27SDimitry Andric } 4140b57cec5SDimitry Andric } 415fe6060f1SDimitry Andric // We can't do recursive deletion until we're done iterating, as we might 416fe6060f1SDimitry Andric // have a phi which (potentially indirectly) uses instructions later in 417fe6060f1SDimitry Andric // the block we're iterating through. 418fe6060f1SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 422*0fca6ea1SDimitry Andric // Loops containing convergent instructions that are uncontrolled or controlled 423*0fca6ea1SDimitry Andric // from outside the loop must have a count that divides their TripMultiple. 424*0fca6ea1SDimitry Andric LLVM_ATTRIBUTE_USED 425*0fca6ea1SDimitry Andric static bool canHaveUnrollRemainder(const Loop *L) { 426*0fca6ea1SDimitry Andric if (getLoopConvergenceHeart(L)) 427*0fca6ea1SDimitry Andric return false; 428*0fca6ea1SDimitry Andric 429*0fca6ea1SDimitry Andric // Check for uncontrolled convergent operations. 430*0fca6ea1SDimitry Andric for (auto &BB : L->blocks()) { 431*0fca6ea1SDimitry Andric for (auto &I : *BB) { 432*0fca6ea1SDimitry Andric if (isa<ConvergenceControlInst>(I)) 433*0fca6ea1SDimitry Andric return true; 434*0fca6ea1SDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) 435*0fca6ea1SDimitry Andric if (CB->isConvergent()) 436*0fca6ea1SDimitry Andric return CB->getConvergenceControlToken(); 437*0fca6ea1SDimitry Andric } 438*0fca6ea1SDimitry Andric } 439*0fca6ea1SDimitry Andric return true; 440*0fca6ea1SDimitry Andric } 441*0fca6ea1SDimitry Andric 4420b57cec5SDimitry Andric /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling 4430b57cec5SDimitry Andric /// can only fail when the loop's latch block is not terminated by a conditional 4440b57cec5SDimitry Andric /// branch instruction. However, if the trip count (and multiple) are not known, 4450b57cec5SDimitry Andric /// loop unrolling will mostly produce more code that is no faster. 4460b57cec5SDimitry Andric /// 447fe6060f1SDimitry Andric /// If Runtime is true then UnrollLoop will try to insert a prologue or 448fe6060f1SDimitry Andric /// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop 449fe6060f1SDimitry Andric /// will not runtime-unroll the loop if computing the run-time trip count will 450fe6060f1SDimitry Andric /// be expensive and AllowExpensiveTripCount is false. 4510b57cec5SDimitry Andric /// 4520b57cec5SDimitry Andric /// The LoopInfo Analysis that is passed will be kept consistent. 4530b57cec5SDimitry Andric /// 4540b57cec5SDimitry Andric /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and 4550b57cec5SDimitry Andric /// DominatorTree if they are non-null. 4560b57cec5SDimitry Andric /// 4570b57cec5SDimitry Andric /// If RemainderLoop is non-null, it will receive the remainder loop (if 4580b57cec5SDimitry Andric /// required and not fully unrolled). 459*0fca6ea1SDimitry Andric LoopUnrollResult 460*0fca6ea1SDimitry Andric llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, 461*0fca6ea1SDimitry Andric ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, 462*0fca6ea1SDimitry Andric const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, 463*0fca6ea1SDimitry Andric bool PreserveLCSSA, Loop **RemainderLoop, AAResults *AA) { 464fe6060f1SDimitry Andric assert(DT && "DomTree is required"); 4650b57cec5SDimitry Andric 466e8d8bef9SDimitry Andric if (!L->getLoopPreheader()) { 4670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); 4680b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 4690b57cec5SDimitry Andric } 4700b57cec5SDimitry Andric 471e8d8bef9SDimitry Andric if (!L->getLoopLatch()) { 4720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); 4730b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric // Loops with indirectbr cannot be cloned. 4770b57cec5SDimitry Andric if (!L->isSafeToClone()) { 4780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); 4790b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 482e8d8bef9SDimitry Andric if (L->getHeader()->hasAddressTaken()) { 4830b57cec5SDimitry Andric // The loop-rotate pass can be helpful to avoid this in many cases. 4840b57cec5SDimitry Andric LLVM_DEBUG( 4850b57cec5SDimitry Andric dbgs() << " Won't unroll loop: address of header block is taken.\n"); 4860b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 4870b57cec5SDimitry Andric } 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric assert(ULO.Count > 0); 4900b57cec5SDimitry Andric 491e8d8bef9SDimitry Andric // All these values should be taken only after peeling because they might have 492e8d8bef9SDimitry Andric // changed. 493e8d8bef9SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader(); 494e8d8bef9SDimitry Andric BasicBlock *Header = L->getHeader(); 495e8d8bef9SDimitry Andric BasicBlock *LatchBlock = L->getLoopLatch(); 496e8d8bef9SDimitry Andric SmallVector<BasicBlock *, 4> ExitBlocks; 497e8d8bef9SDimitry Andric L->getExitBlocks(ExitBlocks); 498e8d8bef9SDimitry Andric std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks(); 499e8d8bef9SDimitry Andric 500fe6060f1SDimitry Andric const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L); 501fe6060f1SDimitry Andric const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); 50206c3fb27SDimitry Andric unsigned EstimatedLoopInvocationWeight = 0; 50306c3fb27SDimitry Andric std::optional<unsigned> OriginalTripCount = 50406c3fb27SDimitry Andric llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight); 505fe6060f1SDimitry Andric 506fe6060f1SDimitry Andric // Effectively "DCE" unrolled iterations that are beyond the max tripcount 507fe6060f1SDimitry Andric // and will never be executed. 508fe6060f1SDimitry Andric if (MaxTripCount && ULO.Count > MaxTripCount) 509fe6060f1SDimitry Andric ULO.Count = MaxTripCount; 510fe6060f1SDimitry Andric 511fe6060f1SDimitry Andric struct ExitInfo { 512fe6060f1SDimitry Andric unsigned TripCount; 513fe6060f1SDimitry Andric unsigned TripMultiple; 514fe6060f1SDimitry Andric unsigned BreakoutTrip; 515fe6060f1SDimitry Andric bool ExitOnTrue; 516bdd1243dSDimitry Andric BasicBlock *FirstExitingBlock = nullptr; 517fe6060f1SDimitry Andric SmallVector<BasicBlock *> ExitingBlocks; 518fe6060f1SDimitry Andric }; 519fe6060f1SDimitry Andric DenseMap<BasicBlock *, ExitInfo> ExitInfos; 520fe6060f1SDimitry Andric SmallVector<BasicBlock *, 4> ExitingBlocks; 521fe6060f1SDimitry Andric L->getExitingBlocks(ExitingBlocks); 522fe6060f1SDimitry Andric for (auto *ExitingBlock : ExitingBlocks) { 523fe6060f1SDimitry Andric // The folding code is not prepared to deal with non-branch instructions 524fe6060f1SDimitry Andric // right now. 525fe6060f1SDimitry Andric auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); 526fe6060f1SDimitry Andric if (!BI) 527fe6060f1SDimitry Andric continue; 528fe6060f1SDimitry Andric 529fe6060f1SDimitry Andric ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second; 530fe6060f1SDimitry Andric Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); 531fe6060f1SDimitry Andric Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); 532fe6060f1SDimitry Andric if (Info.TripCount != 0) { 533fe6060f1SDimitry Andric Info.BreakoutTrip = Info.TripCount % ULO.Count; 534fe6060f1SDimitry Andric Info.TripMultiple = 0; 535fe6060f1SDimitry Andric } else { 536fe6060f1SDimitry Andric Info.BreakoutTrip = Info.TripMultiple = 537bdd1243dSDimitry Andric (unsigned)std::gcd(ULO.Count, Info.TripMultiple); 538fe6060f1SDimitry Andric } 539fe6060f1SDimitry Andric Info.ExitOnTrue = !L->contains(BI->getSuccessor(0)); 540fe6060f1SDimitry Andric Info.ExitingBlocks.push_back(ExitingBlock); 541fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName() 542fe6060f1SDimitry Andric << ": TripCount=" << Info.TripCount 543fe6060f1SDimitry Andric << ", TripMultiple=" << Info.TripMultiple 544fe6060f1SDimitry Andric << ", BreakoutTrip=" << Info.BreakoutTrip << "\n"); 545fe6060f1SDimitry Andric } 546fe6060f1SDimitry Andric 547fe6060f1SDimitry Andric // Are we eliminating the loop control altogether? Note that we can know 548fe6060f1SDimitry Andric // we're eliminating the backedge without knowing exactly which iteration 549fe6060f1SDimitry Andric // of the unrolled body exits. 550fe6060f1SDimitry Andric const bool CompletelyUnroll = ULO.Count == MaxTripCount; 551fe6060f1SDimitry Andric 552fe6060f1SDimitry Andric const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero; 553fe6060f1SDimitry Andric 554fe6060f1SDimitry Andric // There's no point in performing runtime unrolling if this unroll count 555fe6060f1SDimitry Andric // results in a full unroll. 556fe6060f1SDimitry Andric if (CompletelyUnroll) 557fe6060f1SDimitry Andric ULO.Runtime = false; 558fe6060f1SDimitry Andric 559e8d8bef9SDimitry Andric // Go through all exits of L and see if there are any phi-nodes there. We just 560e8d8bef9SDimitry Andric // conservatively assume that they're inserted to preserve LCSSA form, which 561e8d8bef9SDimitry Andric // means that complete unrolling might break this form. We need to either fix 562e8d8bef9SDimitry Andric // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For 563e8d8bef9SDimitry Andric // now we just recompute LCSSA for the outer loop, but it should be possible 564e8d8bef9SDimitry Andric // to fix it in-place. 565e8d8bef9SDimitry Andric bool NeedToFixLCSSA = 566e8d8bef9SDimitry Andric PreserveLCSSA && CompletelyUnroll && 567e8d8bef9SDimitry Andric any_of(ExitBlocks, 568e8d8bef9SDimitry Andric [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); 569e8d8bef9SDimitry Andric 570e8d8bef9SDimitry Andric // The current loop unroll pass can unroll loops that have 571e8d8bef9SDimitry Andric // (1) single latch; and 572e8d8bef9SDimitry Andric // (2a) latch is unconditional; or 573e8d8bef9SDimitry Andric // (2b) latch is conditional and is an exiting block 574e8d8bef9SDimitry Andric // FIXME: The implementation can be extended to work with more complicated 575e8d8bef9SDimitry Andric // cases, e.g. loops with multiple latches. 576e8d8bef9SDimitry Andric BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); 577e8d8bef9SDimitry Andric 578e8d8bef9SDimitry Andric // A conditional branch which exits the loop, which can be optimized to an 579e8d8bef9SDimitry Andric // unconditional branch in the unrolled loop in some cases. 580e8d8bef9SDimitry Andric bool LatchIsExiting = L->isLoopExiting(LatchBlock); 581e8d8bef9SDimitry Andric if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { 582e8d8bef9SDimitry Andric LLVM_DEBUG( 583e8d8bef9SDimitry Andric dbgs() << "Can't unroll; a conditional latch must exit the loop"); 584e8d8bef9SDimitry Andric return LoopUnrollResult::Unmodified; 585e8d8bef9SDimitry Andric } 586e8d8bef9SDimitry Andric 587*0fca6ea1SDimitry Andric assert((!ULO.Runtime || canHaveUnrollRemainder(L)) && 588fe6060f1SDimitry Andric "Can't runtime unroll if loop contains a convergent operation."); 5890b57cec5SDimitry Andric 5900b57cec5SDimitry Andric bool EpilogProfitability = 5910b57cec5SDimitry Andric UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog 5920b57cec5SDimitry Andric : isEpilogProfitable(L); 5930b57cec5SDimitry Andric 594fe6060f1SDimitry Andric if (ULO.Runtime && 5950b57cec5SDimitry Andric !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, 5960b57cec5SDimitry Andric EpilogProfitability, ULO.UnrollRemainder, 5975ffd83dbSDimitry Andric ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, 5980b57cec5SDimitry Andric PreserveLCSSA, RemainderLoop)) { 5990b57cec5SDimitry Andric if (ULO.Force) 600fe6060f1SDimitry Andric ULO.Runtime = false; 6010b57cec5SDimitry Andric else { 6020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " 6030b57cec5SDimitry Andric "generated when assuming runtime trip count\n"); 6040b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 6050b57cec5SDimitry Andric } 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric using namespace ore; 6090b57cec5SDimitry Andric // Report the unrolling decision. 6100b57cec5SDimitry Andric if (CompletelyUnroll) { 6110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() 612fe6060f1SDimitry Andric << " with trip count " << ULO.Count << "!\n"); 6130b57cec5SDimitry Andric if (ORE) 6140b57cec5SDimitry Andric ORE->emit([&]() { 6150b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), 6160b57cec5SDimitry Andric L->getHeader()) 6170b57cec5SDimitry Andric << "completely unrolled loop with " 618fe6060f1SDimitry Andric << NV("UnrollCount", ULO.Count) << " iterations"; 6190b57cec5SDimitry Andric }); 6200b57cec5SDimitry Andric } else { 6210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " 6220b57cec5SDimitry Andric << ULO.Count); 623fe6060f1SDimitry Andric if (ULO.Runtime) 6240b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " with run-time trip count"); 6250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "!\n"); 626fe6060f1SDimitry Andric 627fe6060f1SDimitry Andric if (ORE) 628fe6060f1SDimitry Andric ORE->emit([&]() { 629fe6060f1SDimitry Andric OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), 630fe6060f1SDimitry Andric L->getHeader()); 631fe6060f1SDimitry Andric Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count); 632fe6060f1SDimitry Andric if (ULO.Runtime) 633fe6060f1SDimitry Andric Diag << " with run-time trip count"; 634fe6060f1SDimitry Andric return Diag; 635fe6060f1SDimitry Andric }); 6360b57cec5SDimitry Andric } 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric // We are going to make changes to this loop. SCEV may be keeping cached info 6390b57cec5SDimitry Andric // about it, in particular about backedge taken count. The changes we make 6400b57cec5SDimitry Andric // are guaranteed to invalidate this information for our loop. It is tempting 6410b57cec5SDimitry Andric // to only invalidate the loop being unrolled, but it is incorrect as long as 6420b57cec5SDimitry Andric // all exiting branches from all inner loops have impact on the outer loops, 6430b57cec5SDimitry Andric // and if something changes inside them then any of outer loops may also 6440b57cec5SDimitry Andric // change. When we forget outermost loop, we also forget all contained loops 6450b57cec5SDimitry Andric // and this is what we need here. 6460b57cec5SDimitry Andric if (SE) { 6470b57cec5SDimitry Andric if (ULO.ForgetAllSCEV) 6480b57cec5SDimitry Andric SE->forgetAllLoops(); 649bdd1243dSDimitry Andric else { 6500b57cec5SDimitry Andric SE->forgetTopmostLoop(L); 651bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions(); 652bdd1243dSDimitry Andric } 6530b57cec5SDimitry Andric } 6540b57cec5SDimitry Andric 6555ffd83dbSDimitry Andric if (!LatchIsExiting) 6565ffd83dbSDimitry Andric ++NumUnrolledNotLatch; 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric // For the first iteration of the loop, we should use the precloned values for 6590b57cec5SDimitry Andric // PHI nodes. Insert associations now. 6600b57cec5SDimitry Andric ValueToValueMapTy LastValueMap; 6610b57cec5SDimitry Andric std::vector<PHINode*> OrigPHINode; 6620b57cec5SDimitry Andric for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { 6630b57cec5SDimitry Andric OrigPHINode.push_back(cast<PHINode>(I)); 6640b57cec5SDimitry Andric } 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andric std::vector<BasicBlock *> Headers; 6670b57cec5SDimitry Andric std::vector<BasicBlock *> Latches; 6680b57cec5SDimitry Andric Headers.push_back(Header); 6690b57cec5SDimitry Andric Latches.push_back(LatchBlock); 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric // The current on-the-fly SSA update requires blocks to be processed in 6720b57cec5SDimitry Andric // reverse postorder so that LastValueMap contains the correct value at each 6730b57cec5SDimitry Andric // exit. 6740b57cec5SDimitry Andric LoopBlocksDFS DFS(L); 6750b57cec5SDimitry Andric DFS.perform(LI); 6760b57cec5SDimitry Andric 6770b57cec5SDimitry Andric // Stash the DFS iterators before adding blocks to the loop. 6780b57cec5SDimitry Andric LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); 6790b57cec5SDimitry Andric LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks(); 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric // Loop Unrolling might create new loops. While we do preserve LoopInfo, we 6840b57cec5SDimitry Andric // might break loop-simplified form for these loops (as they, e.g., would 6850b57cec5SDimitry Andric // share the same exit blocks). We'll keep track of loops for which we can 6860b57cec5SDimitry Andric // break this so that later we can re-simplify them. 6870b57cec5SDimitry Andric SmallSetVector<Loop *, 4> LoopsToSimplify; 6880b57cec5SDimitry Andric for (Loop *SubLoop : *L) 6890b57cec5SDimitry Andric LoopsToSimplify.insert(SubLoop); 6900b57cec5SDimitry Andric 691fe6060f1SDimitry Andric // When a FSDiscriminator is enabled, we don't need to add the multiply 692fe6060f1SDimitry Andric // factors to the discriminators. 693bdd1243dSDimitry Andric if (Header->getParent()->shouldEmitDebugInfoForProfiling() && 694bdd1243dSDimitry Andric !EnableFSDiscriminator) 6950b57cec5SDimitry Andric for (BasicBlock *BB : L->getBlocks()) 6960b57cec5SDimitry Andric for (Instruction &I : *BB) 69706c3fb27SDimitry Andric if (!I.isDebugOrPseudoInst()) 6980b57cec5SDimitry Andric if (const DILocation *DIL = I.getDebugLoc()) { 6990b57cec5SDimitry Andric auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count); 7000b57cec5SDimitry Andric if (NewDIL) 70181ad6265SDimitry Andric I.setDebugLoc(*NewDIL); 7020b57cec5SDimitry Andric else 7030b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 7040b57cec5SDimitry Andric << "Failed to create new discriminator: " 7050b57cec5SDimitry Andric << DIL->getFilename() << " Line: " << DIL->getLine()); 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric 708e8d8bef9SDimitry Andric // Identify what noalias metadata is inside the loop: if it is inside the 709e8d8bef9SDimitry Andric // loop, the associated metadata must be cloned for each iteration. 710e8d8bef9SDimitry Andric SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; 711e8d8bef9SDimitry Andric identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); 712e8d8bef9SDimitry Andric 713349cc55cSDimitry Andric // We place the unrolled iterations immediately after the original loop 714349cc55cSDimitry Andric // latch. This is a reasonable default placement if we don't have block 715349cc55cSDimitry Andric // frequencies, and if we do, well the layout will be adjusted later. 716349cc55cSDimitry Andric auto BlockInsertPt = std::next(LatchBlock->getIterator()); 7170b57cec5SDimitry Andric for (unsigned It = 1; It != ULO.Count; ++It) { 7185ffd83dbSDimitry Andric SmallVector<BasicBlock *, 8> NewBlocks; 7190b57cec5SDimitry Andric SmallDenseMap<const Loop *, Loop *, 4> NewLoops; 7200b57cec5SDimitry Andric NewLoops[L] = L; 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { 7230b57cec5SDimitry Andric ValueToValueMapTy VMap; 7240b57cec5SDimitry Andric BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); 725bdd1243dSDimitry Andric Header->getParent()->insert(BlockInsertPt, New); 7260b57cec5SDimitry Andric 7270b57cec5SDimitry Andric assert((*BB != Header || LI->getLoopFor(*BB) == L) && 7280b57cec5SDimitry Andric "Header should not be in a sub-loop"); 7290b57cec5SDimitry Andric // Tell LI about New. 7300b57cec5SDimitry Andric const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); 7310b57cec5SDimitry Andric if (OldLoop) 7320b57cec5SDimitry Andric LoopsToSimplify.insert(NewLoops[OldLoop]); 7330b57cec5SDimitry Andric 734*0fca6ea1SDimitry Andric if (*BB == Header) { 7350b57cec5SDimitry Andric // Loop over all of the PHI nodes in the block, changing them to use 7360b57cec5SDimitry Andric // the incoming values from the previous block. 7370b57cec5SDimitry Andric for (PHINode *OrigPHI : OrigPHINode) { 7380b57cec5SDimitry Andric PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]); 7390b57cec5SDimitry Andric Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); 7400b57cec5SDimitry Andric if (Instruction *InValI = dyn_cast<Instruction>(InVal)) 7410b57cec5SDimitry Andric if (It > 1 && L->contains(InValI)) 7420b57cec5SDimitry Andric InVal = LastValueMap[InValI]; 7430b57cec5SDimitry Andric VMap[OrigPHI] = InVal; 744bdd1243dSDimitry Andric NewPHI->eraseFromParent(); 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric 747*0fca6ea1SDimitry Andric // Eliminate copies of the loop heart intrinsic, if any. 748*0fca6ea1SDimitry Andric if (ULO.Heart) { 749*0fca6ea1SDimitry Andric auto it = VMap.find(ULO.Heart); 750*0fca6ea1SDimitry Andric assert(it != VMap.end()); 751*0fca6ea1SDimitry Andric Instruction *heartCopy = cast<Instruction>(it->second); 752*0fca6ea1SDimitry Andric heartCopy->eraseFromParent(); 753*0fca6ea1SDimitry Andric VMap.erase(it); 754*0fca6ea1SDimitry Andric } 755*0fca6ea1SDimitry Andric } 756*0fca6ea1SDimitry Andric 7570b57cec5SDimitry Andric // Update our running map of newest clones 7580b57cec5SDimitry Andric LastValueMap[*BB] = New; 7590b57cec5SDimitry Andric for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); 7600b57cec5SDimitry Andric VI != VE; ++VI) 7610b57cec5SDimitry Andric LastValueMap[VI->first] = VI->second; 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric // Add phi entries for newly created values to all exit blocks. 7640b57cec5SDimitry Andric for (BasicBlock *Succ : successors(*BB)) { 7650b57cec5SDimitry Andric if (L->contains(Succ)) 7660b57cec5SDimitry Andric continue; 7670b57cec5SDimitry Andric for (PHINode &PHI : Succ->phis()) { 7680b57cec5SDimitry Andric Value *Incoming = PHI.getIncomingValueForBlock(*BB); 7690b57cec5SDimitry Andric ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); 7700b57cec5SDimitry Andric if (It != LastValueMap.end()) 7710b57cec5SDimitry Andric Incoming = It->second; 7720b57cec5SDimitry Andric PHI.addIncoming(Incoming, New); 773bdd1243dSDimitry Andric SE->forgetValue(&PHI); 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric } 7760b57cec5SDimitry Andric // Keep track of new headers and latches as we create them, so that 7770b57cec5SDimitry Andric // we can insert the proper branches later. 7780b57cec5SDimitry Andric if (*BB == Header) 7790b57cec5SDimitry Andric Headers.push_back(New); 7800b57cec5SDimitry Andric if (*BB == LatchBlock) 7810b57cec5SDimitry Andric Latches.push_back(New); 7820b57cec5SDimitry Andric 7835ffd83dbSDimitry Andric // Keep track of the exiting block and its successor block contained in 7845ffd83dbSDimitry Andric // the loop for the current iteration. 785fe6060f1SDimitry Andric auto ExitInfoIt = ExitInfos.find(*BB); 786fe6060f1SDimitry Andric if (ExitInfoIt != ExitInfos.end()) 787fe6060f1SDimitry Andric ExitInfoIt->second.ExitingBlocks.push_back(New); 7880b57cec5SDimitry Andric 7890b57cec5SDimitry Andric NewBlocks.push_back(New); 7900b57cec5SDimitry Andric UnrolledLoopBlocks.push_back(New); 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric // Update DomTree: since we just copy the loop body, and each copy has a 7930b57cec5SDimitry Andric // dedicated entry block (copy of the header block), this header's copy 7940b57cec5SDimitry Andric // dominates all copied blocks. That means, dominance relations in the 7950b57cec5SDimitry Andric // copied body are the same as in the original body. 7960b57cec5SDimitry Andric if (*BB == Header) 7970b57cec5SDimitry Andric DT->addNewBlock(New, Latches[It - 1]); 7980b57cec5SDimitry Andric else { 7990b57cec5SDimitry Andric auto BBDomNode = DT->getNode(*BB); 8000b57cec5SDimitry Andric auto BBIDom = BBDomNode->getIDom(); 8010b57cec5SDimitry Andric BasicBlock *OriginalBBIDom = BBIDom->getBlock(); 8020b57cec5SDimitry Andric DT->addNewBlock( 8030b57cec5SDimitry Andric New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); 8040b57cec5SDimitry Andric } 8050b57cec5SDimitry Andric } 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric // Remap all instructions in the most recent iteration 8085ffd83dbSDimitry Andric remapInstructionsInBlocks(NewBlocks, LastValueMap); 809fe6060f1SDimitry Andric for (BasicBlock *NewBlock : NewBlocks) 810fe6060f1SDimitry Andric for (Instruction &I : *NewBlock) 811fe6060f1SDimitry Andric if (auto *II = dyn_cast<AssumeInst>(&I)) 8120b57cec5SDimitry Andric AC->registerAssumption(II); 813e8d8bef9SDimitry Andric 814e8d8bef9SDimitry Andric { 815e8d8bef9SDimitry Andric // Identify what other metadata depends on the cloned version. After 816e8d8bef9SDimitry Andric // cloning, replace the metadata with the corrected version for both 817e8d8bef9SDimitry Andric // memory instructions and noalias intrinsics. 818e8d8bef9SDimitry Andric std::string ext = (Twine("It") + Twine(It)).str(); 819e8d8bef9SDimitry Andric cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, 820e8d8bef9SDimitry Andric Header->getContext(), ext); 821e8d8bef9SDimitry Andric } 8220b57cec5SDimitry Andric } 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric // Loop over the PHI nodes in the original block, setting incoming values. 8250b57cec5SDimitry Andric for (PHINode *PN : OrigPHINode) { 8260b57cec5SDimitry Andric if (CompletelyUnroll) { 8270b57cec5SDimitry Andric PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); 828bdd1243dSDimitry Andric PN->eraseFromParent(); 8290b57cec5SDimitry Andric } else if (ULO.Count > 1) { 8300b57cec5SDimitry Andric Value *InVal = PN->removeIncomingValue(LatchBlock, false); 8310b57cec5SDimitry Andric // If this value was defined in the loop, take the value defined by the 8320b57cec5SDimitry Andric // last iteration of the loop. 8330b57cec5SDimitry Andric if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { 8340b57cec5SDimitry Andric if (L->contains(InValI)) 8350b57cec5SDimitry Andric InVal = LastValueMap[InVal]; 8360b57cec5SDimitry Andric } 8370b57cec5SDimitry Andric assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); 8380b57cec5SDimitry Andric PN->addIncoming(InVal, Latches.back()); 8390b57cec5SDimitry Andric } 8400b57cec5SDimitry Andric } 8410b57cec5SDimitry Andric 8425ffd83dbSDimitry Andric // Connect latches of the unrolled iterations to the headers of the next 843fe6060f1SDimitry Andric // iteration. Currently they point to the header of the same iteration. 8440b57cec5SDimitry Andric for (unsigned i = 0, e = Latches.size(); i != e; ++i) { 8450b57cec5SDimitry Andric unsigned j = (i + 1) % e; 846fe6060f1SDimitry Andric Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]); 8470b57cec5SDimitry Andric } 8480b57cec5SDimitry Andric 8490b57cec5SDimitry Andric // Update dominators of blocks we might reach through exits. 8500b57cec5SDimitry Andric // Immediate dominator of such block might change, because we add more 8510b57cec5SDimitry Andric // routes which can lead to the exit: we can now reach it from the copied 8520b57cec5SDimitry Andric // iterations too. 853fe6060f1SDimitry Andric if (ULO.Count > 1) { 8540b57cec5SDimitry Andric for (auto *BB : OriginalLoopBlocks) { 8550b57cec5SDimitry Andric auto *BBDomNode = DT->getNode(BB); 8560b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> ChildrenToUpdate; 8575ffd83dbSDimitry Andric for (auto *ChildDomNode : BBDomNode->children()) { 8580b57cec5SDimitry Andric auto *ChildBB = ChildDomNode->getBlock(); 8590b57cec5SDimitry Andric if (!L->contains(ChildBB)) 8600b57cec5SDimitry Andric ChildrenToUpdate.push_back(ChildBB); 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric // The new idom of the block will be the nearest common dominator 8630b57cec5SDimitry Andric // of all copies of the previous idom. This is equivalent to the 8640b57cec5SDimitry Andric // nearest common dominator of the previous idom and the first latch, 8650b57cec5SDimitry Andric // which dominates all copies of the previous idom. 866fe6060f1SDimitry Andric BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); 8670b57cec5SDimitry Andric for (auto *ChildBB : ChildrenToUpdate) 8680b57cec5SDimitry Andric DT->changeImmediateDominator(ChildBB, NewIDom); 8690b57cec5SDimitry Andric } 8700b57cec5SDimitry Andric } 8710b57cec5SDimitry Andric 872fe6060f1SDimitry Andric assert(!UnrollVerifyDomtree || 8730b57cec5SDimitry Andric DT->verify(DominatorTree::VerificationLevel::Fast)); 8740b57cec5SDimitry Andric 875bdd1243dSDimitry Andric SmallVector<DominatorTree::UpdateType> DTUpdates; 876fe6060f1SDimitry Andric auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) { 877fe6060f1SDimitry Andric auto *Term = cast<BranchInst>(Src->getTerminator()); 878fe6060f1SDimitry Andric const unsigned Idx = ExitOnTrue ^ WillExit; 879fe6060f1SDimitry Andric BasicBlock *Dest = Term->getSuccessor(Idx); 880fe6060f1SDimitry Andric BasicBlock *DeadSucc = Term->getSuccessor(1-Idx); 881fe6060f1SDimitry Andric 882fe6060f1SDimitry Andric // Remove predecessors from all non-Dest successors. 883fe6060f1SDimitry Andric DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true); 884fe6060f1SDimitry Andric 885fe6060f1SDimitry Andric // Replace the conditional branch with an unconditional one. 886*0fca6ea1SDimitry Andric BranchInst::Create(Dest, Term->getIterator()); 887fe6060f1SDimitry Andric Term->eraseFromParent(); 888fe6060f1SDimitry Andric 889bdd1243dSDimitry Andric DTUpdates.emplace_back(DominatorTree::Delete, Src, DeadSucc); 890fe6060f1SDimitry Andric }; 891fe6060f1SDimitry Andric 892fe6060f1SDimitry Andric auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j, 893bdd1243dSDimitry Andric bool IsLatch) -> std::optional<bool> { 894fe6060f1SDimitry Andric if (CompletelyUnroll) { 895fe6060f1SDimitry Andric if (PreserveOnlyFirst) { 896fe6060f1SDimitry Andric if (i == 0) 897bdd1243dSDimitry Andric return std::nullopt; 898fe6060f1SDimitry Andric return j == 0; 899fe6060f1SDimitry Andric } 900fe6060f1SDimitry Andric // Complete (but possibly inexact) unrolling 901fe6060f1SDimitry Andric if (j == 0) 902fe6060f1SDimitry Andric return true; 903fe6060f1SDimitry Andric if (Info.TripCount && j != Info.TripCount) 904fe6060f1SDimitry Andric return false; 905bdd1243dSDimitry Andric return std::nullopt; 906fe6060f1SDimitry Andric } 907fe6060f1SDimitry Andric 908fe6060f1SDimitry Andric if (ULO.Runtime) { 909fe6060f1SDimitry Andric // If runtime unrolling inserts a prologue, information about non-latch 910fe6060f1SDimitry Andric // exits may be stale. 911fe6060f1SDimitry Andric if (IsLatch && j != 0) 912fe6060f1SDimitry Andric return false; 913bdd1243dSDimitry Andric return std::nullopt; 914fe6060f1SDimitry Andric } 915fe6060f1SDimitry Andric 916fe6060f1SDimitry Andric if (j != Info.BreakoutTrip && 917fe6060f1SDimitry Andric (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) { 918fe6060f1SDimitry Andric // If we know the trip count or a multiple of it, we can safely use an 919fe6060f1SDimitry Andric // unconditional branch for some iterations. 920fe6060f1SDimitry Andric return false; 921fe6060f1SDimitry Andric } 922bdd1243dSDimitry Andric return std::nullopt; 923fe6060f1SDimitry Andric }; 924fe6060f1SDimitry Andric 925fe6060f1SDimitry Andric // Fold branches for iterations where we know that they will exit or not 926fe6060f1SDimitry Andric // exit. 927bdd1243dSDimitry Andric for (auto &Pair : ExitInfos) { 928bdd1243dSDimitry Andric ExitInfo &Info = Pair.second; 929fe6060f1SDimitry Andric for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) { 930fe6060f1SDimitry Andric // The branch destination. 931fe6060f1SDimitry Andric unsigned j = (i + 1) % e; 932fe6060f1SDimitry Andric bool IsLatch = Pair.first == LatchBlock; 933bdd1243dSDimitry Andric std::optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch); 934bdd1243dSDimitry Andric if (!KnownWillExit) { 935bdd1243dSDimitry Andric if (!Info.FirstExitingBlock) 936bdd1243dSDimitry Andric Info.FirstExitingBlock = Info.ExitingBlocks[i]; 937fe6060f1SDimitry Andric continue; 938bdd1243dSDimitry Andric } 939fe6060f1SDimitry Andric 940fe6060f1SDimitry Andric // We don't fold known-exiting branches for non-latch exits here, 941fe6060f1SDimitry Andric // because this ensures that both all loop blocks and all exit blocks 942fe6060f1SDimitry Andric // remain reachable in the CFG. 943fe6060f1SDimitry Andric // TODO: We could fold these branches, but it would require much more 944fe6060f1SDimitry Andric // sophisticated updates to LoopInfo. 945bdd1243dSDimitry Andric if (*KnownWillExit && !IsLatch) { 946bdd1243dSDimitry Andric if (!Info.FirstExitingBlock) 947bdd1243dSDimitry Andric Info.FirstExitingBlock = Info.ExitingBlocks[i]; 948fe6060f1SDimitry Andric continue; 949bdd1243dSDimitry Andric } 950fe6060f1SDimitry Andric 951fe6060f1SDimitry Andric SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue); 952fe6060f1SDimitry Andric } 953fe6060f1SDimitry Andric } 954fe6060f1SDimitry Andric 955bdd1243dSDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 956bdd1243dSDimitry Andric DomTreeUpdater *DTUToUse = &DTU; 957bdd1243dSDimitry Andric if (ExitingBlocks.size() == 1 && ExitInfos.size() == 1) { 958bdd1243dSDimitry Andric // Manually update the DT if there's a single exiting node. In that case 959bdd1243dSDimitry Andric // there's a single exit node and it is sufficient to update the nodes 960bdd1243dSDimitry Andric // immediately dominated by the original exiting block. They will become 961bdd1243dSDimitry Andric // dominated by the first exiting block that leaves the loop after 962bdd1243dSDimitry Andric // unrolling. Note that the CFG inside the loop does not change, so there's 963bdd1243dSDimitry Andric // no need to update the DT inside the unrolled loop. 964bdd1243dSDimitry Andric DTUToUse = nullptr; 965bdd1243dSDimitry Andric auto &[OriginalExit, Info] = *ExitInfos.begin(); 966bdd1243dSDimitry Andric if (!Info.FirstExitingBlock) 967bdd1243dSDimitry Andric Info.FirstExitingBlock = Info.ExitingBlocks.back(); 968bdd1243dSDimitry Andric for (auto *C : to_vector(DT->getNode(OriginalExit)->children())) { 969bdd1243dSDimitry Andric if (L->contains(C->getBlock())) 970bdd1243dSDimitry Andric continue; 971bdd1243dSDimitry Andric C->setIDom(DT->getNode(Info.FirstExitingBlock)); 972bdd1243dSDimitry Andric } 973bdd1243dSDimitry Andric } else { 974bdd1243dSDimitry Andric DTU.applyUpdates(DTUpdates); 975bdd1243dSDimitry Andric } 976bdd1243dSDimitry Andric 977fe6060f1SDimitry Andric // When completely unrolling, the last latch becomes unreachable. 978bdd1243dSDimitry Andric if (!LatchIsExiting && CompletelyUnroll) { 979bdd1243dSDimitry Andric // There is no need to update the DT here, because there must be a unique 980bdd1243dSDimitry Andric // latch. Hence if the latch is not exiting it must directly branch back to 981bdd1243dSDimitry Andric // the original loop header and does not dominate any nodes. 982bdd1243dSDimitry Andric assert(LatchBlock->getSingleSuccessor() && "Loop with multiple latches?"); 983bdd1243dSDimitry Andric changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA); 984bdd1243dSDimitry Andric } 985fe6060f1SDimitry Andric 9860b57cec5SDimitry Andric // Merge adjacent basic blocks, if possible. 9870b57cec5SDimitry Andric for (BasicBlock *Latch : Latches) { 9880b57cec5SDimitry Andric BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator()); 9890b57cec5SDimitry Andric assert((Term || 9900b57cec5SDimitry Andric (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) && 9910b57cec5SDimitry Andric "Need a branch as terminator, except when fully unrolling with " 9920b57cec5SDimitry Andric "unconditional latch"); 9930b57cec5SDimitry Andric if (Term && Term->isUnconditional()) { 9940b57cec5SDimitry Andric BasicBlock *Dest = Term->getSuccessor(0); 9950b57cec5SDimitry Andric BasicBlock *Fold = Dest->getUniquePredecessor(); 996bdd1243dSDimitry Andric if (MergeBlockIntoPredecessor(Dest, /*DTU=*/DTUToUse, LI, 997bdd1243dSDimitry Andric /*MSSAU=*/nullptr, /*MemDep=*/nullptr, 998bdd1243dSDimitry Andric /*PredecessorWithTwoSuccessors=*/false, 999bdd1243dSDimitry Andric DTUToUse ? nullptr : DT)) { 10000b57cec5SDimitry Andric // Dest has been folded into Fold. Update our worklists accordingly. 10010b57cec5SDimitry Andric std::replace(Latches.begin(), Latches.end(), Dest, Fold); 10025f757f3fSDimitry Andric llvm::erase(UnrolledLoopBlocks, Dest); 10030b57cec5SDimitry Andric } 10040b57cec5SDimitry Andric } 10050b57cec5SDimitry Andric } 1006bdd1243dSDimitry Andric 1007bdd1243dSDimitry Andric if (DTUToUse) { 10088bcb0991SDimitry Andric // Apply updates to the DomTree. 10098bcb0991SDimitry Andric DT = &DTU.getDomTree(); 1010bdd1243dSDimitry Andric } 101104eeddc0SDimitry Andric assert(!UnrollVerifyDomtree || 101204eeddc0SDimitry Andric DT->verify(DominatorTree::VerificationLevel::Fast)); 101304eeddc0SDimitry Andric 10140b57cec5SDimitry Andric // At this point, the code is well formed. We now simplify the unrolled loop, 10150b57cec5SDimitry Andric // doing constant propagation and dead code elimination as we go. 1016fe6060f1SDimitry Andric simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC, 1017*0fca6ea1SDimitry Andric TTI, AA); 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric NumCompletelyUnrolled += CompletelyUnroll; 10200b57cec5SDimitry Andric ++NumUnrolled; 10210b57cec5SDimitry Andric 10220b57cec5SDimitry Andric Loop *OuterL = L->getParentLoop(); 10230b57cec5SDimitry Andric // Update LoopInfo if the loop is completely removed. 102406c3fb27SDimitry Andric if (CompletelyUnroll) { 10250b57cec5SDimitry Andric LI->erase(L); 102606c3fb27SDimitry Andric // We shouldn't try to use `L` anymore. 102706c3fb27SDimitry Andric L = nullptr; 102806c3fb27SDimitry Andric } else if (OriginalTripCount) { 102906c3fb27SDimitry Andric // Update the trip count. Note that the remainder has already logic 103006c3fb27SDimitry Andric // computing it in `UnrollRuntimeLoopRemainder`. 103106c3fb27SDimitry Andric setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count, 103206c3fb27SDimitry Andric EstimatedLoopInvocationWeight); 103306c3fb27SDimitry Andric } 10340b57cec5SDimitry Andric 103504eeddc0SDimitry Andric // LoopInfo should not be valid, confirm that. 103604eeddc0SDimitry Andric if (UnrollVerifyLoopInfo) 103704eeddc0SDimitry Andric LI->verify(*DT); 103804eeddc0SDimitry Andric 10390b57cec5SDimitry Andric // After complete unrolling most of the blocks should be contained in OuterL. 10400b57cec5SDimitry Andric // However, some of them might happen to be out of OuterL (e.g. if they 10410b57cec5SDimitry Andric // precede a loop exit). In this case we might need to insert PHI nodes in 10420b57cec5SDimitry Andric // order to preserve LCSSA form. 10430b57cec5SDimitry Andric // We don't need to check this if we already know that we need to fix LCSSA 10440b57cec5SDimitry Andric // form. 10450b57cec5SDimitry Andric // TODO: For now we just recompute LCSSA for the outer loop in this case, but 10460b57cec5SDimitry Andric // it should be possible to fix it in-place. 10470b57cec5SDimitry Andric if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA) 10480b57cec5SDimitry Andric NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI); 10490b57cec5SDimitry Andric 1050fe6060f1SDimitry Andric // Make sure that loop-simplify form is preserved. We want to simplify 10510b57cec5SDimitry Andric // at least one layer outside of the loop that was unrolled so that any 10520b57cec5SDimitry Andric // changes to the parent loop exposed by the unrolling are considered. 10530b57cec5SDimitry Andric if (OuterL) { 10540b57cec5SDimitry Andric // OuterL includes all loops for which we can break loop-simplify, so 10550b57cec5SDimitry Andric // it's sufficient to simplify only it (it'll recursively simplify inner 10560b57cec5SDimitry Andric // loops too). 10570b57cec5SDimitry Andric if (NeedToFixLCSSA) { 10580b57cec5SDimitry Andric // LCSSA must be performed on the outermost affected loop. The unrolled 10590b57cec5SDimitry Andric // loop's last loop latch is guaranteed to be in the outermost loop 10600b57cec5SDimitry Andric // after LoopInfo's been updated by LoopInfo::erase. 10610b57cec5SDimitry Andric Loop *LatchLoop = LI->getLoopFor(Latches.back()); 10620b57cec5SDimitry Andric Loop *FixLCSSALoop = OuterL; 10630b57cec5SDimitry Andric if (!FixLCSSALoop->contains(LatchLoop)) 10640b57cec5SDimitry Andric while (FixLCSSALoop->getParentLoop() != LatchLoop) 10650b57cec5SDimitry Andric FixLCSSALoop = FixLCSSALoop->getParentLoop(); 10660b57cec5SDimitry Andric 10670b57cec5SDimitry Andric formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE); 10680b57cec5SDimitry Andric } else if (PreserveLCSSA) { 10690b57cec5SDimitry Andric assert(OuterL->isLCSSAForm(*DT) && 10700b57cec5SDimitry Andric "Loops should be in LCSSA form after loop-unroll."); 10710b57cec5SDimitry Andric } 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric // TODO: That potentially might be compile-time expensive. We should try 10740b57cec5SDimitry Andric // to fix the loop-simplified form incrementally. 10750b57cec5SDimitry Andric simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA); 10760b57cec5SDimitry Andric } else { 10770b57cec5SDimitry Andric // Simplify loops for which we might've broken loop-simplify form. 10780b57cec5SDimitry Andric for (Loop *SubLoop : LoopsToSimplify) 10790b57cec5SDimitry Andric simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); 10800b57cec5SDimitry Andric } 10810b57cec5SDimitry Andric 10820b57cec5SDimitry Andric return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled 10830b57cec5SDimitry Andric : LoopUnrollResult::PartiallyUnrolled; 10840b57cec5SDimitry Andric } 10850b57cec5SDimitry Andric 10860b57cec5SDimitry Andric /// Given an llvm.loop loop id metadata node, returns the loop hint metadata 10870b57cec5SDimitry Andric /// node with the given name (for example, "llvm.loop.unroll.count"). If no 10880b57cec5SDimitry Andric /// such metadata node exists, then nullptr is returned. 10890b57cec5SDimitry Andric MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { 10900b57cec5SDimitry Andric // First operand should refer to the loop id itself. 10910b57cec5SDimitry Andric assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 10920b57cec5SDimitry Andric assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 10930b57cec5SDimitry Andric 1094*0fca6ea1SDimitry Andric for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) { 1095*0fca6ea1SDimitry Andric MDNode *MD = dyn_cast<MDNode>(MDO); 10960b57cec5SDimitry Andric if (!MD) 10970b57cec5SDimitry Andric continue; 10980b57cec5SDimitry Andric 10990b57cec5SDimitry Andric MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 11000b57cec5SDimitry Andric if (!S) 11010b57cec5SDimitry Andric continue; 11020b57cec5SDimitry Andric 1103*0fca6ea1SDimitry Andric if (Name == S->getString()) 11040b57cec5SDimitry Andric return MD; 11050b57cec5SDimitry Andric } 11060b57cec5SDimitry Andric return nullptr; 11070b57cec5SDimitry Andric } 1108